[clang] [HLSL] Appropriately set function attribute optnone (PR #125937)

S. Bharadwaj Yadavalli via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 11 08:25:16 PST 2025


https://github.com/bharadwajy updated https://github.com/llvm/llvm-project/pull/125937

>From 63a728dd7f5efff32a2f90608adfdc13d540f34f Mon Sep 17 00:00:00 2001
From: "S. Bharadwaj Yadavalli" <Bharadwaj.Yadavalli at microsoft.com>
Date: Tue, 4 Feb 2025 12:48:09 -0500
Subject: [PATCH 1/4] [HLSL] Set function optnone attribute appropriately

When optimization is disabled, set optnone attribute
  - for all module functions when targetting Library shaders
  - only for entry function when targetting non-Library shaders

Update tests in accordance with the change.
---
 clang/lib/CodeGen/CGHLSLRuntime.cpp           |  10 +
 .../CodeGenHLSL/GlobalConstructorLib.hlsl     |   8 +-
 clang/test/CodeGenHLSL/GlobalDestructors.hlsl |   4 +-
 .../test/CodeGenHLSL/inline-constructors.hlsl |  16 +-
 clang/test/CodeGenHLSL/inline-functions.hlsl  | 189 +++++++++++++-----
 .../CodeGenHLSL/this-assignment-overload.hlsl |   6 +-
 6 files changed, 174 insertions(+), 59 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 2ce54cc3c52ef..24acc9a559be2 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -345,6 +345,9 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
                 WaveSizeAttr->getPreferred());
     Fn->addFnAttr(WaveSizeKindStr, WaveSizeStr);
   }
+  if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+    Fn->addFnAttr(llvm::Attribute::OptimizeNone);
+  }
   Fn->addFnAttr(llvm::Attribute::NoInline);
 }
 
@@ -446,6 +449,13 @@ void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD,
     const StringRef ExportAttrKindStr = "hlsl.export";
     Fn->addFnAttr(ExportAttrKindStr);
   }
+  llvm::Triple T(Fn->getParent()->getTargetTriple());
+  if (T.getEnvironment() == llvm::Triple::EnvironmentType::Library) {
+    if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+      Fn->addFnAttr(llvm::Attribute::OptimizeNone);
+      Fn->addFnAttr(llvm::Attribute::NoInline);
+    }
+  }
 }
 
 static void gatherFunctions(SmallVectorImpl<Function *> &Fns, llvm::Module &M,
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
index 09c44f6242c53..39d7c73e832a1 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
 
 // Make sure global variable for ctors exist for lib profile.
 // CHECK:@llvm.global_ctors
@@ -31,12 +31,12 @@ void SecondEntry() {}
 // CHECK: ret void
 
 
-// Verify the constructor is alwaysinline
-// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
+// Verify the constructor is optnone
+// NOINLINE: ; Function Attrs: {{.*}} optnone
 // NOINLINE-NEXT: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC2Ev({{.*}} [[CtorAttr:\#[0-9]+]]
 
 // NOINLINE: ; Function Attrs: {{.*}}alwaysinline
 // NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[InitAttr:\#[0-9]+]]
 
 // NOINLINE-DAG: attributes [[InitAttr]] = {{.*}} alwaysinline
-// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} alwaysinline
+// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} optnone
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
index f98318601134b..8961e1a7e59cd 100644
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
 
 // Tests that constructors and destructors are appropriately generated for globals
 // and that their calls are inlined when AlwaysInline is run
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
index b0d5a783fb372..298d7d4272678 100644
--- a/clang/test/CodeGenHLSL/inline-constructors.hlsl
+++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,NOINLINE_LIB
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
 
 // Tests that implicit constructor calls for user classes will always be inlined.
 
@@ -50,6 +50,10 @@ void NionsDay(int hours) {
 // NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
 // NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
 // NOINLINE-NEXT: call void @_Z4mainj(i32 %0)
+// NOINLINE_LIB: call void @_ZN4WeedC1Ev
+// NOINLINE_LIB-NEXT: call void @_ZN5KittyC1Ev
+// NOINLINE_LIB-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE_LIB-NEXT: call void @_Z4mainj(i32 %0)
 // Verify inlining leaves only calls to "llvm." intrinsics
 // INLINE-NOT:    call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
 // CHECK:         ret void
diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
index e78d04ec9594f..4a7217e3a89e3 100644
--- a/clang/test/CodeGenHLSL/inline-functions.hlsl
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -1,22 +1,35 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-
-// Tests that user functions will always be inlined.
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,CHECK_LIB_OPTNONE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_LIB_OPTNONE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_OPT
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,CHECK_CS_OPTNONE_NOPASS
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_CS_OPTNONE_PASS
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_OPT
+
+// Tests inlining of user functions based on specified optimization options.
 // This includes exported functions and mangled entry point implementation functions.
-// The unmangled entry functions must not be alwaysinlined.
 
 #define MAX 100
 
 float nums[MAX];
 
-// Verify that all functions have the alwaysinline attribute
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
-// NOINLINE: ret void
+// Check optnone attribute for library target compilation
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for non-entry functions of compute target compilation
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_NOPASS: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for non-entry functions of compute target compilation
+// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_PASS: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for opt compilation to library target
+// CHECK_OPT: Function Attrs: alwaysinline
+// CHECK_OPT: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %ix1, i32 noundef %ix2) {{[a-z_ ]*}} [[SwapOptAttr:\#[0-9]+]]
+
+// CHECK: ret void
+
 // Swap the values of Buf at indices ix1 and ix2
 void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
   float tmp = Buf[ix1];
@@ -24,9 +37,24 @@ void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
   Buf[ix2] = tmp;
 }
 
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
-// NOINLINE: ret void
+// Check optnone attribute for library target compilation
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
+
+// Check alwaysinline attribute for non-entry functions of compute target compilation
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_NOPASS: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
+
+// Check alwaysinline attribute for non-entry functions of compute target compilation
+// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_PASS: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
+
+// Check alwaysinline attribute for opt compilation to library target
+// CHECK_OPT: Function Attrs: alwaysinline
+// CHECK_OPT: define void @_Z10BubbleSortA100_jj(ptr noundef readonly byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %size) {{[a-z_ ]*}} [[BubOptAttr:\#[0-9]+]]
+
+// CHECK: ret void
+
 // Inefficiently sort Buf in place
 void BubbleSort(unsigned Buf[MAX], unsigned size) {
   bool swapped = true;
@@ -41,12 +69,26 @@ void BubbleSort(unsigned Buf[MAX], unsigned size) {
   }
 }
 
-// Note ExtAttr is the inlined export set of attribs
-// CHECK: Function Attrs: alwaysinline
-// CHECK: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) {{[a-z_ ]*}}[[ExtAttr:\#[0-9]+]]
-// CHECK: ret i32
+// Check optnone attribute for library target compilation of exported function
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
 // Sort Buf and remove any duplicate values
 // returns the number of values left
+
+// Check alwaysinline attribute for exported function of compute target compilation
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_NOPASS: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for exported function of compute target compilation
+// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_PASS: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for exported function of library target compilation
+// CHECK_OPT: Function Attrs: alwaysinline
+// CHECK_OPT: define noundef i32 @_Z11RemoveDupesA100_jj(ptr noundef byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %size) {{[a-z_ ]*}} [[RemOptAttr:\#[0-9]+]]
+
+// CHECK: ret i32
+
 export
 unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
   BubbleSort(Buf, size);
@@ -63,19 +105,44 @@ unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
 
 RWBuffer<unsigned> Indices;
 
-// The mangled version of main only remains without inlining
-// because it has internal linkage from the start
-// Note main functions get the norecurse attrib, which IntAttr reflects
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define internal void @_Z4mainj(i32 noundef %GI) [[IntAttr]]
-// NOINLINE: ret void
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// Internal function attributes are the same as those of source function's
+// CHECK_LIB_OPTNONE: define internal void @_Z4mainj(i32 noundef %GI) [[ExtAttr]]
+// CHECK_LIB_OPTNONE: ret void
+
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
+// Internal function attributes are different from those of source function's
+// CHECK_CS_OPTNONE_NOPASS: define internal void @_Z4mainj(i32 noundef %GI) [[ExtAttr]]
+// CHECK_CS_OPTNONE_NOPASS: ret void
+
+// Check internal function @_Z4mainj is not generated when LLVM passes enabled
+// CHECK_CS_OPTNONE_PASS-NOT: define internal void @_Z4mainj
+
+// Check internal function @_Z4mainj is not generated as it should be inlined
+// for opt builds
+// CHECK_OPT-NOT: define internal void @_Z4mainj
 
 // The unmangled version is not inlined, EntryAttr reflects that
-// CHECK: Function Attrs: {{.*}}noinline
-// CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure function calls are inlined when AlwaysInline is run
-// This only leaves calls to llvm. intrinsics
-// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK_LIB_OPTNONE: Function Attrs: {{.*}}noinline
+// CHECK_LIB_OPTNONE: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_LIB_OPTNONE: call void @_Z4mainj
+
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs:{{.*}}optnone
+// CHECK_CS_OPTNONE_NOPASS: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_CS_OPTNONE_NOPASS: call void @_Z4mainj
+
+// CHECK_CS_OPTNONE_PASS: Function Attrs: {{.*}}noinline
+// CHECK_CS_OPTNONE_PASS: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is inlined when LLVM passes are enabled
+// CHECK_CS_OPTNONE_PASS: _Z4mainj.exit:
+
+// CHECK_OPT: Function Attrs: {{.*}}noinline
+// CHECK_OPT: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is inlined as optimization is enabled
+// CHECK_OPT: _Z4mainj.exit:
+
 // CHECK: ret void
 
 [numthreads(1,1,1)]
@@ -90,19 +157,41 @@ void main(unsigned int GI : SV_GroupIndex) {
     tmpIndices[i] = Indices[i];
 }
 
-// The mangled version of main only remains without inlining
-// because it has internal linkage from the start
-// Note main functions get the norecurse attrib, which IntAttr reflects
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define internal void @_Z6main10v() [[IntAttr]]
-// NOINLINE: ret void
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define internal void @_Z6main10v() [[ExtAttr]]
+// CHECK_LIB_OPTNONE: ret void
+
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs:{{.*}}alwaysinline
+// CHECK_CS_OPTNONE_NOPASS: define internal void @_Z6main10v() [[ExtAttr]]
+// CHECK_CS_OPTNONE_NOPASS: ret void
+
+// Check internal function @_Z6main10v is not generated when LLVM passes are enabled
+// CHECK_CS_OPTNONE_PASS-NOT: define internal void @_Z6main10v
+
+// Check internal function @_Z6main10v is not generated as it should be inlined
+// CHECK_OPT-NOT: define internal void @_Z6main10v
 
 // The unmangled version is not inlined, EntryAttr reflects that
-// CHECK: Function Attrs: {{.*}}noinline
-// CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Make sure function calls are inlined when AlwaysInline is run
-// This only leaves calls to llvm. intrinsics
-// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK_LIB_OPTNONE: Function Attrs: {{.*}}noinline
+// CHECK_LIB_OPTNONE: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_LIB_OPTNONE: call void @_Z6main10v
+
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs: {{.*}}noinline
+// CHECK_CS_OPTNONE_NOPASS: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_CS_OPTNONE_NOPASS: call void @_Z6main10v
+
+// CHECK_CS_OPTNONE_PASS: Function Attrs: {{.*}}noinline
+// CHECK_CS_OPTNONE_PASS: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Check internal function is inlined as optimization is enabled when LLVM passes
+// are enabled
+// CHECK_CS_OPTNONE_PASS: _Z6main10v.exit:
+
+// CHECK_OPT: Function Attrs: {{.*}}noinline
+// CHECK_OPT: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure internal function is inlined as optimization is enabled
+// CHECK_OPT: _Z6main10v.exit:
 // CHECK: ret void
 
 [numthreads(1,1,1)]
@@ -111,6 +200,16 @@ void main10() {
   main(10);
 }
 
-// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
-// CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
-// CHECK: attributes [[EntryAttr]] = {{.*}} noinline
+// CHECK_LIB_OPTNONE: attributes [[ExtAttr]] = {{.*}} optnone
+// CHECK_LIB_OPTNONE: attributes [[ExportAttr]] = {{.*}} optnone
+
+// CHECK_CS_OPTNONE_NOPASS: attributes [[ExtAttr]] ={{.*}} alwaysinline
+// CHECK_CS_OPTNONE_NOPASS: attributes [[EntryAttr]] = {{.*}} noinline
+
+// CHECK_CS_OPTNONE_PASS: attributes [[ExtAttr]] ={{.*}} alwaysinline
+// CHECK_CS_OPTNONE_PASS: attributes [[EntryAttr]] = {{.*}} noinline
+
+// CHECK_OPT: attributes [[SwapOptAttr]] ={{.*}} alwaysinline
+// CHECK_OPT: attributes [[BubOptAttr]] ={{.*}} alwaysinline
+// CHECK_OPT: attributes [[RemOptAttr]] ={{.*}} alwaysinline
+// CHECK_OPT: attributes [[EntryAttr]] ={{.*}} noinline
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
index 5a3bdc3d4d38e..5c0e2a71f94b8 100644
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
@@ -25,7 +25,7 @@ void main() {
 }
 
 // This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators.
-// CHECK:     define linkonce_odr noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
+// CHECK:     define linkonce_odr noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %this) [[Attr:\#[0-9]+]] align 2 {
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
 // CHECK-NEXT:%Another = alloca %struct.Pair, align 4
@@ -42,7 +42,7 @@ void main() {
 // CHECK-NEXT:%0 = load i32, ptr %First2, align 4
 // CHECK-NEXT:ret i32 %0
 
-// CHECK:     define linkonce_odr noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
+// CHECK:     define linkonce_odr noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %this) [[Attr]] align 2 {
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
 // CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 4
@@ -53,3 +53,5 @@ void main() {
 // CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
 // CHECK-NEXT:%0 = load i32, ptr %Second, align 4
 // CHECK-NEXT:ret i32 %0
+
+// CHECK: attributes [[Attr]] = {{.*}}alwaysinline

>From 00897b8e279c2baffaa2c6db2524ec1a61de46e0 Mon Sep 17 00:00:00 2001
From: "S. Bharadwaj Yadavalli" <Bharadwaj.Yadavalli at microsoft.com>
Date: Fri, 7 Feb 2025 18:03:19 -0500
Subject: [PATCH 2/4] [HLSL] Delete the change that sets non-entry functions of
 a library shader as optnone if optimization is disabled.

Update corresponding test changes
---
 clang/lib/CodeGen/CGHLSLRuntime.cpp           |   7 -
 .../CodeGenHLSL/GlobalConstructorLib.hlsl     |   8 +-
 clang/test/CodeGenHLSL/GlobalDestructors.hlsl |   4 +-
 .../test/CodeGenHLSL/inline-constructors.hlsl |  16 +-
 clang/test/CodeGenHLSL/inline-functions.hlsl  | 191 +++++-------------
 .../CodeGenHLSL/this-assignment-overload.hlsl |   6 +-
 6 files changed, 61 insertions(+), 171 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 24acc9a559be2..f83916746bfaf 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -449,13 +449,6 @@ void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD,
     const StringRef ExportAttrKindStr = "hlsl.export";
     Fn->addFnAttr(ExportAttrKindStr);
   }
-  llvm::Triple T(Fn->getParent()->getTargetTriple());
-  if (T.getEnvironment() == llvm::Triple::EnvironmentType::Library) {
-    if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
-      Fn->addFnAttr(llvm::Attribute::OptimizeNone);
-      Fn->addFnAttr(llvm::Attribute::NoInline);
-    }
-  }
 }
 
 static void gatherFunctions(SmallVectorImpl<Function *> &Fns, llvm::Module &M,
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
index 39d7c73e832a1..09c44f6242c53 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
 
 // Make sure global variable for ctors exist for lib profile.
 // CHECK:@llvm.global_ctors
@@ -31,12 +31,12 @@ void SecondEntry() {}
 // CHECK: ret void
 
 
-// Verify the constructor is optnone
-// NOINLINE: ; Function Attrs: {{.*}} optnone
+// Verify the constructor is alwaysinline
+// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
 // NOINLINE-NEXT: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC2Ev({{.*}} [[CtorAttr:\#[0-9]+]]
 
 // NOINLINE: ; Function Attrs: {{.*}}alwaysinline
 // NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[InitAttr:\#[0-9]+]]
 
 // NOINLINE-DAG: attributes [[InitAttr]] = {{.*}} alwaysinline
-// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} optnone
+// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} alwaysinline
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
index 8961e1a7e59cd..f98318601134b 100644
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
 
 // Tests that constructors and destructors are appropriately generated for globals
 // and that their calls are inlined when AlwaysInline is run
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
index 298d7d4272678..b0d5a783fb372 100644
--- a/clang/test/CodeGenHLSL/inline-constructors.hlsl
+++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,NOINLINE_LIB
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
 
 // Tests that implicit constructor calls for user classes will always be inlined.
 
@@ -50,10 +50,6 @@ void NionsDay(int hours) {
 // NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
 // NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
 // NOINLINE-NEXT: call void @_Z4mainj(i32 %0)
-// NOINLINE_LIB: call void @_ZN4WeedC1Ev
-// NOINLINE_LIB-NEXT: call void @_ZN5KittyC1Ev
-// NOINLINE_LIB-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-// NOINLINE_LIB-NEXT: call void @_Z4mainj(i32 %0)
 // Verify inlining leaves only calls to "llvm." intrinsics
 // INLINE-NOT:    call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
 // CHECK:         ret void
diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
index 4a7217e3a89e3..4748eeee7475f 100644
--- a/clang/test/CodeGenHLSL/inline-functions.hlsl
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -1,35 +1,22 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,CHECK_LIB_OPTNONE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_LIB_OPTNONE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_OPT
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,CHECK_CS_OPTNONE_NOPASS
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_CS_OPTNONE_PASS
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_OPT
-
-// Tests inlining of user functions based on specified optimization options.
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE,OPT_ATTR
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,OPT_ATTR
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,NOOPT_ATTR
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,OPT_ATTR
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,NOOPT_ATTR
+
+// Tests that user functions will always be inlined.
 // This includes exported functions and mangled entry point implementation functions.
+// The unmangled entry functions must not be alwaysinlined.
 
 #define MAX 100
 
 float nums[MAX];
 
-// Check optnone attribute for library target compilation
-// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
-// CHECK_LIB_OPTNONE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
-
-// Check alwaysinline attribute for non-entry functions of compute target compilation
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_NOPASS: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
-
-// Check alwaysinline attribute for non-entry functions of compute target compilation
-// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_PASS: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
-
-// Check alwaysinline attribute for opt compilation to library target
-// CHECK_OPT: Function Attrs: alwaysinline
-// CHECK_OPT: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %ix1, i32 noundef %ix2) {{[a-z_ ]*}} [[SwapOptAttr:\#[0-9]+]]
-
-// CHECK: ret void
-
+// Verify that all functions have the alwaysinline attribute
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
+// NOINLINE: ret void
 // Swap the values of Buf at indices ix1 and ix2
 void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
   float tmp = Buf[ix1];
@@ -37,24 +24,9 @@ void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
   Buf[ix2] = tmp;
 }
 
-// Check optnone attribute for library target compilation
-// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
-// CHECK_LIB_OPTNONE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
-
-// Check alwaysinline attribute for non-entry functions of compute target compilation
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_NOPASS: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
-
-// Check alwaysinline attribute for non-entry functions of compute target compilation
-// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_PASS: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
-
-// Check alwaysinline attribute for opt compilation to library target
-// CHECK_OPT: Function Attrs: alwaysinline
-// CHECK_OPT: define void @_Z10BubbleSortA100_jj(ptr noundef readonly byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %size) {{[a-z_ ]*}} [[BubOptAttr:\#[0-9]+]]
-
-// CHECK: ret void
-
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
+// NOINLINE: ret void
 // Inefficiently sort Buf in place
 void BubbleSort(unsigned Buf[MAX], unsigned size) {
   bool swapped = true;
@@ -69,26 +41,12 @@ void BubbleSort(unsigned Buf[MAX], unsigned size) {
   }
 }
 
-// Check optnone attribute for library target compilation of exported function
-// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
-// CHECK_LIB_OPTNONE: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
+// Note ExtAttr is the inlined export set of attribs
+// CHECK: Function Attrs: alwaysinline
+// CHECK: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) {{[a-z_ ]*}}[[ExtAttr:\#[0-9]+]]
+// CHECK: ret i32
 // Sort Buf and remove any duplicate values
 // returns the number of values left
-
-// Check alwaysinline attribute for exported function of compute target compilation
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_NOPASS: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
-
-// Check alwaysinline attribute for exported function of compute target compilation
-// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_PASS: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
-
-// Check alwaysinline attribute for exported function of library target compilation
-// CHECK_OPT: Function Attrs: alwaysinline
-// CHECK_OPT: define noundef i32 @_Z11RemoveDupesA100_jj(ptr noundef byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %size) {{[a-z_ ]*}} [[RemOptAttr:\#[0-9]+]]
-
-// CHECK: ret i32
-
 export
 unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
   BubbleSort(Buf, size);
@@ -105,44 +63,20 @@ unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
 
 RWBuffer<unsigned> Indices;
 
-// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
-// Internal function attributes are the same as those of source function's
-// CHECK_LIB_OPTNONE: define internal void @_Z4mainj(i32 noundef %GI) [[ExtAttr]]
-// CHECK_LIB_OPTNONE: ret void
-
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
-// Internal function attributes are different from those of source function's
-// CHECK_CS_OPTNONE_NOPASS: define internal void @_Z4mainj(i32 noundef %GI) [[ExtAttr]]
-// CHECK_CS_OPTNONE_NOPASS: ret void
-
-// Check internal function @_Z4mainj is not generated when LLVM passes enabled
-// CHECK_CS_OPTNONE_PASS-NOT: define internal void @_Z4mainj
-
-// Check internal function @_Z4mainj is not generated as it should be inlined
-// for opt builds
-// CHECK_OPT-NOT: define internal void @_Z4mainj
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @_Z4mainj(i32 noundef %GI) [[IntAttr]]
+// NOINLINE: ret void
 
 // The unmangled version is not inlined, EntryAttr reflects that
-// CHECK_LIB_OPTNONE: Function Attrs: {{.*}}noinline
-// CHECK_LIB_OPTNONE: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure internal function is not inlined when optimization is disabled
-// CHECK_LIB_OPTNONE: call void @_Z4mainj
-
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs:{{.*}}optnone
-// CHECK_CS_OPTNONE_NOPASS: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure internal function is not inlined when optimization is disabled
-// CHECK_CS_OPTNONE_NOPASS: call void @_Z4mainj
-
-// CHECK_CS_OPTNONE_PASS: Function Attrs: {{.*}}noinline
-// CHECK_CS_OPTNONE_PASS: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure internal function is inlined when LLVM passes are enabled
-// CHECK_CS_OPTNONE_PASS: _Z4mainj.exit:
-
-// CHECK_OPT: Function Attrs: {{.*}}noinline
-// CHECK_OPT: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure internal function is inlined as optimization is enabled
-// CHECK_OPT: _Z4mainj.exit:
-
+// OPT_ATTR: Function Attrs: {{.*}}optnone
+// NOOPT_ATTR-NOT: Function Attrs: {{.*}}optnone
+// CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
 // CHECK: ret void
 
 [numthreads(1,1,1)]
@@ -157,41 +91,20 @@ void main(unsigned int GI : SV_GroupIndex) {
     tmpIndices[i] = Indices[i];
 }
 
-// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
-// CHECK_LIB_OPTNONE: define internal void @_Z6main10v() [[ExtAttr]]
-// CHECK_LIB_OPTNONE: ret void
-
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs:{{.*}}alwaysinline
-// CHECK_CS_OPTNONE_NOPASS: define internal void @_Z6main10v() [[ExtAttr]]
-// CHECK_CS_OPTNONE_NOPASS: ret void
-
-// Check internal function @_Z6main10v is not generated when LLVM passes are enabled
-// CHECK_CS_OPTNONE_PASS-NOT: define internal void @_Z6main10v
-
-// Check internal function @_Z6main10v is not generated as it should be inlined
-// CHECK_OPT-NOT: define internal void @_Z6main10v
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @_Z6main10v() [[IntAttr]]
+// NOINLINE: ret void
 
 // The unmangled version is not inlined, EntryAttr reflects that
-// CHECK_LIB_OPTNONE: Function Attrs: {{.*}}noinline
-// CHECK_LIB_OPTNONE: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Make sure internal function is not inlined when optimization is disabled
-// CHECK_LIB_OPTNONE: call void @_Z6main10v
-
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs: {{.*}}noinline
-// CHECK_CS_OPTNONE_NOPASS: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Make sure internal function is not inlined when optimization is disabled
-// CHECK_CS_OPTNONE_NOPASS: call void @_Z6main10v
-
-// CHECK_CS_OPTNONE_PASS: Function Attrs: {{.*}}noinline
-// CHECK_CS_OPTNONE_PASS: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Check internal function is inlined as optimization is enabled when LLVM passes
-// are enabled
-// CHECK_CS_OPTNONE_PASS: _Z6main10v.exit:
-
-// CHECK_OPT: Function Attrs: {{.*}}noinline
-// CHECK_OPT: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Make sure internal function is inlined as optimization is enabled
-// CHECK_OPT: _Z6main10v.exit:
+// OPT_ATTR: Function Attrs: {{.*}}optnone
+// NOOPT_ATTR-NOT: Function Attrs: {{.*}}optnone
+// CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
 // CHECK: ret void
 
 [numthreads(1,1,1)]
@@ -200,16 +113,6 @@ void main10() {
   main(10);
 }
 
-// CHECK_LIB_OPTNONE: attributes [[ExtAttr]] = {{.*}} optnone
-// CHECK_LIB_OPTNONE: attributes [[ExportAttr]] = {{.*}} optnone
-
-// CHECK_CS_OPTNONE_NOPASS: attributes [[ExtAttr]] ={{.*}} alwaysinline
-// CHECK_CS_OPTNONE_NOPASS: attributes [[EntryAttr]] = {{.*}} noinline
-
-// CHECK_CS_OPTNONE_PASS: attributes [[ExtAttr]] ={{.*}} alwaysinline
-// CHECK_CS_OPTNONE_PASS: attributes [[EntryAttr]] = {{.*}} noinline
-
-// CHECK_OPT: attributes [[SwapOptAttr]] ={{.*}} alwaysinline
-// CHECK_OPT: attributes [[BubOptAttr]] ={{.*}} alwaysinline
-// CHECK_OPT: attributes [[RemOptAttr]] ={{.*}} alwaysinline
-// CHECK_OPT: attributes [[EntryAttr]] ={{.*}} noinline
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[EntryAttr]] = {{.*}} noinline
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
index 5c0e2a71f94b8..5a3bdc3d4d38e 100644
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
@@ -25,7 +25,7 @@ void main() {
 }
 
 // This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators.
-// CHECK:     define linkonce_odr noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %this) [[Attr:\#[0-9]+]] align 2 {
+// CHECK:     define linkonce_odr noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
 // CHECK-NEXT:%Another = alloca %struct.Pair, align 4
@@ -42,7 +42,7 @@ void main() {
 // CHECK-NEXT:%0 = load i32, ptr %First2, align 4
 // CHECK-NEXT:ret i32 %0
 
-// CHECK:     define linkonce_odr noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %this) [[Attr]] align 2 {
+// CHECK:     define linkonce_odr noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
 // CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 4
@@ -53,5 +53,3 @@ void main() {
 // CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
 // CHECK-NEXT:%0 = load i32, ptr %Second, align 4
 // CHECK-NEXT:ret i32 %0
-
-// CHECK: attributes [[Attr]] = {{.*}}alwaysinline

>From 54b1e873d18a3efa6970b878ff3d1885f7b8929d Mon Sep 17 00:00:00 2001
From: "S. Bharadwaj Yadavalli" <Bharadwaj.Yadavalli at microsoft.com>
Date: Tue, 11 Feb 2025 10:26:36 -0500
Subject: [PATCH 3/4] Add comment

---
 clang/lib/CodeGen/CGHLSLRuntime.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index f83916746bfaf..193a8d038a8ee 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -345,6 +345,11 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
                 WaveSizeAttr->getPreferred());
     Fn->addFnAttr(WaveSizeKindStr, WaveSizeStr);
   }
+  // HLSL entry functions are materialized for module functions with
+  // HLSLShaderAttr attribute. SetLLVMFunctionAttributesForDefinition called
+  // later in the compiler-flow for such module functions is not aware of and
+  // hence not able to set attributes of the newly materialized entry functions.
+  // So, set attributes of entry function here, as appropriate.
   if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
     Fn->addFnAttr(llvm::Attribute::OptimizeNone);
   }

>From 37bcc29524f2f639f9788a87eb2b7bdaaf135abf Mon Sep 17 00:00:00 2001
From: "S. Bharadwaj Yadavalli" <Bharadwaj.Yadavalli at microsoft.com>
Date: Tue, 11 Feb 2025 11:24:56 -0500
Subject: [PATCH 4/4] Delete braces around simple single-statement body of if
 statement

---
 clang/lib/CodeGen/CGHLSLRuntime.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 193a8d038a8ee..6cccd353cff96 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -350,9 +350,8 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
   // later in the compiler-flow for such module functions is not aware of and
   // hence not able to set attributes of the newly materialized entry functions.
   // So, set attributes of entry function here, as appropriate.
-  if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
     Fn->addFnAttr(llvm::Attribute::OptimizeNone);
-  }
   Fn->addFnAttr(llvm::Attribute::NoInline);
 }
 



More information about the cfe-commits mailing list