[clang] [HLSL] Appropriately set function attribute optnone (PR #125937)
S. Bharadwaj Yadavalli via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 5 13:32:09 PST 2025
https://github.com/bharadwajy created https://github.com/llvm/llvm-project/pull/125937
When optimization is disabled, set optnone attribute
- for all module functions when targetting library shaders
- only for entry function when targetting non-library shaders
Updated tests in accordance with the change.
With this change, generation of non-standard DXIL metadata `dx.disable_optimizations` is planned to be eliminated (#124796) in a follow-on PR.
>From f04a746ae9f11c3aa73e22690ad46ba6c7df133d Mon Sep 17 00:00:00 2001
From: "S. Bharadwaj Yadavalli" <Bharadwaj.Yadavalli at microsoft.com>
Date: Tue, 4 Feb 2025 12:48:09 -0500
Subject: [PATCH] [HLSL] Set function optnone attribute appropriately
When optimization is disabled, set optnone attribute
- for all module functions when targetting Library shaders
- only for entry function when targetting non-Library shaders
Update tests in accordance with the change.
---
clang/lib/CodeGen/CodeGenModule.cpp | 11 +-
.../GlobalConstructorFunction.hlsl | 5 +-
.../CodeGenHLSL/GlobalConstructorLib.hlsl | 8 +-
clang/test/CodeGenHLSL/GlobalDestructors.hlsl | 4 +-
.../builtins/RWBuffer-subscript.hlsl | 3 +-
.../StructuredBuffers-subscripts.hlsl | 3 +-
.../implicit-norecurse-attrib.hlsl | 31 ++--
.../test/CodeGenHLSL/inline-constructors.hlsl | 14 +-
clang/test/CodeGenHLSL/inline-functions.hlsl | 158 +++++++++++++-----
.../CodeGenHLSL/this-assignment-overload.hlsl | 6 +-
10 files changed, 164 insertions(+), 79 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 02615bb13dfb8a7..42cb7d8634a24b2 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2545,7 +2545,16 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
// Non-entry HLSL functions must always be inlined.
if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline) &&
!D->hasAttr<NoInlineAttr>()) {
- B.addAttribute(llvm::Attribute::AlwaysInline);
+ // Set OptimizeNone for HLSL entry functions if ShouldAddOptNone
+ // or for all HLSL functions compiled for Library target.
+ llvm::Triple T(F->getParent()->getTargetTriple());
+ if (ShouldAddOptNone &&
+ (D->hasAttr<HLSLShaderAttr>() ||
+ T.getEnvironment() == llvm::Triple::EnvironmentType::Library)) {
+ B.addAttribute(llvm::Attribute::OptimizeNone);
+ B.addAttribute(llvm::Attribute::NoInline);
+ } else
+ B.addAttribute(llvm::Attribute::AlwaysInline);
} else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
// Add optnone, but do so only if the function isn't always_inline.
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
index c0eb1b138ed0475..e880ec93c634ed5 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
@@ -32,11 +32,10 @@ void main(unsigned GI : SV_GroupIndex) {}
// NOINLINE-NEXT: call void @_Z12call_me_lastv(
// NOINLINE-NEXT: ret void
-// Verify constructor calls are inlined when AlwaysInline is run
-// INLINE-NEXT: alloca
+// Verify constructor calls are inlined
// INLINE-NEXT: store i32 12
// INLINE-NEXT: store i32 13
// INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-// INLINE-NEXT: store i32 %
+// INLINE-NEXT: call void @_Z4mainj(i32 %0)
// INLINE-NEXT: store i32 0
// INLINE: ret void
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
index 09c44f6242c53c7..39d7c73e832a104 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
// Make sure global variable for ctors exist for lib profile.
// CHECK:@llvm.global_ctors
@@ -31,12 +31,12 @@ void SecondEntry() {}
// CHECK: ret void
-// Verify the constructor is alwaysinline
-// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
+// Verify the constructor is optnone
+// NOINLINE: ; Function Attrs: {{.*}} optnone
// NOINLINE-NEXT: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC2Ev({{.*}} [[CtorAttr:\#[0-9]+]]
// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[InitAttr:\#[0-9]+]]
// NOINLINE-DAG: attributes [[InitAttr]] = {{.*}} alwaysinline
-// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} alwaysinline
+// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} optnone
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
index f98318601134bb2..8961e1a7e59cd9c 100644
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
// Tests that constructors and destructors are appropriately generated for globals
// and that their calls are inlined when AlwaysInline is run
diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
index 2ad5b82a029129f..9c9532da426c961 100644
--- a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
@@ -6,7 +6,8 @@ RWBuffer<int> Out;
[numthreads(1,1,1)]
void main(unsigned GI : SV_GroupIndex) {
- // CHECK: define void @main()
+ // DXC: define internal void @_Z4mainj(i32 noundef %GI)
+ // SPIRV: define internal spir_func void @_Z4mainj(i32 noundef %GI)
// DXC: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
// SPIRV: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_i32_5_2_0_0_2_0t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-subscripts.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-subscripts.hlsl
index 2af7c3ed3219ffc..c3a8225720670fc 100644
--- a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-subscripts.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-subscripts.hlsl
@@ -6,8 +6,7 @@ RasterizerOrderedStructuredBuffer<int> Out2;
[numthreads(1,1,1)]
void main(unsigned GI : SV_GroupIndex) {
- // CHECK: define void @main()
-
+ // CHECK: define internal void @_Z4mainj(i32 noundef %GI)
// CHECK: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_0_0t(target("dx.RawBuffer", i32, 0, 0) %{{.*}}, i32 %{{.*}})
// CHECK: %[[LOAD:.*]] = load i32, ptr %[[INPTR]]
// CHECK: %[[OUT1PTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %{{.*}}, i32 %{{.*}})
diff --git a/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl b/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
index a8ab6ce98ae7e96..415a78ceabd98b9 100644
--- a/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
+++ b/clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.3-library -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
-// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.0-compute -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.3-library -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,LIB
+// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.0-compute -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,COMPUTE
// Verify that a few different function types all get the NoRecurse attribute
@@ -11,7 +11,8 @@ struct Node {
uint left, right;
};
-// CHECK: Function Attrs:{{.*}}norecurse
+// LIB: Function Attrs:{{.*}}norecurse{{.*}}optnone
+// COMPUTE: Function Attrs: alwaysinline{{.*}}norecurse
// CHECK: define noundef i32 @_Z4FindA100_4Nodej(ptr noundef byval([100 x %struct.Node]) align 4 %SortedTree, i32 noundef %key) [[IntAttr:\#[0-9]+]]
// CHECK: ret i32
// Find and return value corresponding to key in the SortedTree
@@ -30,7 +31,8 @@ uint Find(Node SortedTree[MAX], uint key) {
}
}
-// CHECK: Function Attrs:{{.*}}norecurse
+// LIB: Function Attrs:{{.*}}norecurse{{.*}}optnone
+// COMPUTE: Function Attrs: alwaysinline{{.*}}norecurse
// CHECK: define noundef i1 @_Z8InitTreeA100_4NodeN4hlsl8RWBufferIDv4_jEEj(ptr noundef byval([100 x %struct.Node]) align 4 %tree, ptr noundef byval(%"class.hlsl::RWBuffer") align 4 %encodedTree, i32 noundef %maxDepth) [[ExtAttr:\#[0-9]+]]
// CHECK: ret i1
// Initialize tree with given buffer
@@ -51,12 +53,13 @@ bool InitTree(/*inout*/ Node tree[MAX], RWBuffer<uint4> encodedTree, uint maxDep
RWBuffer<uint4> gTree;
// Mangled entry points are internal
-// CHECK: Function Attrs:{{.*}}norecurse
-// CHECK: define internal void @_Z4mainj(i32 noundef %GI) [[IntAttr]]
+// CHECK: Function Attrs:{{.*}}norecurse{{.*}}optnone
+// LIB: define internal void @_Z4mainj(i32 noundef %GI) [[IntAttr]]
+// COMPUTE: define internal void @_Z4mainj(i32 noundef %GI) [[IntComputeAttr:\#[0-9]]]
// CHECK: ret void
// Canonical entry points are external and shader attributed
-// CHECK: Function Attrs:{{.*}}norecurse
+// CHECK: Function Attrs: convergent noinline norecurse
// CHECK: define void @main() [[EntryAttr:\#[0-9]+]]
// CHECK: ret void
@@ -70,12 +73,13 @@ void main(uint GI : SV_GroupIndex) {
}
// Mangled entry points are internal
-// CHECK: Function Attrs:{{.*}}norecurse
-// CHECK: define internal void @_Z11defaultMainv() [[IntAttr]]
+// CHECK: Function Attrs:{{.*}}norecurse{{.*}}optnone
+// LIB: define internal void @_Z11defaultMainv() [[IntAttr]]
+// COMPUTE: define internal void @_Z11defaultMainv() [[IntComputeAttr]]
// CHECK: ret void
// Canonical entry points are external and shader attributed
-// CHECK: Function Attrs:{{.*}}norecurse
+// CHECK: Function Attrs: convergent noinline norecurse
// CHECK: define void @defaultMain() [[EntryAttr]]
// CHECK: ret void
@@ -88,6 +92,7 @@ void defaultMain() {
needle = Find(haystack, needle);
}
-// CHECK: attributes [[IntAttr]] = {{.*}} norecurse
-// CHECK: attributes [[ExtAttr]] = {{.*}} norecurse
-// CHECK: attributes [[EntryAttr]] = {{.*}} norecurse
+// CHECK: attributes [[IntAttr]] = {{.*}}norecurse
+// CHECK: attributes [[ExtAttr]] = {{.*}}norecurse
+// COMPUTE: attributes [[IntComputeAttr]] = {{.*}}norecurse
+// CHECK: attributes [[EntryAttr]] = {{.*}}norecurse
\ No newline at end of file
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
index b0d5a783fb3725c..340146321509cbe 100644
--- a/clang/test/CodeGenHLSL/inline-constructors.hlsl
+++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,NOINLINE_INTERNAL
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,NOINLINE_INTERNAL
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
// Tests that implicit constructor calls for user classes will always be inlined.
@@ -67,6 +67,8 @@ void main(unsigned GI : SV_GroupIndex) {
// NOINLINE-NEXT: call void @_Z9rainyMainv()
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// Verify internal function is not inlined
+// NOINLINE_INTERNAL: call void @_Z9rainyMainv()
// CHECK: ret void
[shader("compute")]
[numthreads(1,1,1)]
diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
index e78d04ec9594fac..25b89c7b0289625 100644
--- a/clang/test/CodeGenHLSL/inline-functions.hlsl
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -1,22 +1,31 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-
-// Tests that user functions will always be inlined.
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,CHECK_LIB_OPTNONE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_LIB_OPTNONE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_OPT
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,CHECK_CS_OPTNONE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_CS_OPTNONE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_OPT
+
+// Tests inlining of user functions based on specified optimization options.
// This includes exported functions and mangled entry point implementation functions.
-// The unmangled entry functions must not be alwaysinlined.
#define MAX 100
float nums[MAX];
-// Verify that all functions have the alwaysinline attribute
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
-// NOINLINE: ret void
+// Check optnone attribute for library target compilation
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for non-entry functions of compute target compilation
+// CHECK_CS_OPTNONE: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for opt compilation to library target
+// CHECK_OPT: Function Attrs: alwaysinline
+// CHECK_OPT: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %ix1, i32 noundef %ix2) {{[a-z_ ]*}} [[SwapOptAttr:\#[0-9]+]]
+
+// CHECK: ret void
+
// Swap the values of Buf at indices ix1 and ix2
void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
float tmp = Buf[ix1];
@@ -24,9 +33,20 @@ void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
Buf[ix2] = tmp;
}
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
-// NOINLINE: ret void
+// Check optnone attribute for library target compilation
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
+
+// Check alwaysinline attribute for non-entry functions of compute target compilation
+// CHECK_CS_OPTNONE: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
+
+// Check alwaysinline attribute for opt compilation to library target
+// CHECK_OPT: Function Attrs: alwaysinline
+// CHECK_OPT: define void @_Z10BubbleSortA100_jj(ptr noundef readonly byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %size) {{[a-z_ ]*}} [[BubOptAttr:\#[0-9]+]]
+
+// CHECK: ret void
+
// Inefficiently sort Buf in place
void BubbleSort(unsigned Buf[MAX], unsigned size) {
bool swapped = true;
@@ -41,12 +61,22 @@ void BubbleSort(unsigned Buf[MAX], unsigned size) {
}
}
-// Note ExtAttr is the inlined export set of attribs
-// CHECK: Function Attrs: alwaysinline
-// CHECK: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) {{[a-z_ ]*}}[[ExtAttr:\#[0-9]+]]
-// CHECK: ret i32
+// Check optnone attribute for library target compilation of exported function
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
// Sort Buf and remove any duplicate values
// returns the number of values left
+
+// Check alwaysinline attribute for exported function of compute target compilation
+// CHECK_CS_OPTNONE: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for exported function of library target compilation
+// CHECK_OPT: Function Attrs: alwaysinline
+// CHECK_OPT: define noundef i32 @_Z11RemoveDupesA100_jj(ptr noundef byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %size) {{[a-z_ ]*}} [[RemOptAttr:\#[0-9]+]]
+
+// CHECK: ret i32
+
export
unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
BubbleSort(Buf, size);
@@ -63,19 +93,36 @@ unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
RWBuffer<unsigned> Indices;
-// The mangled version of main only remains without inlining
-// because it has internal linkage from the start
-// Note main functions get the norecurse attrib, which IntAttr reflects
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define internal void @_Z4mainj(i32 noundef %GI) [[IntAttr]]
-// NOINLINE: ret void
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// Internal function attributes are the same as those of source function's
+// CHECK_LIB_OPTNONE: define internal void @_Z4mainj(i32 noundef %GI) [[ExtAttr]]
+// CHECK_LIB_OPTNONE: ret void
+
+// CHECK_CS_OPTNONE: Function Attrs:{{.*}}optnone
+// Internal function attributes are different from those of source function's
+// CHECK_CS_OPTNONE: define internal void @_Z4mainj(i32 noundef %GI) [[IntAttr:\#[0-9]+]]
+// CHECK_CS_OPTNONE: ret void
+
+// Check internal function @_Z4mainj is not generated as it should be inlined
+// for opt builds
+// CHECK_OPT-NOT: define internal void @_Z4mainj
// The unmangled version is not inlined, EntryAttr reflects that
-// CHECK: Function Attrs: {{.*}}noinline
-// CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure function calls are inlined when AlwaysInline is run
-// This only leaves calls to llvm. intrinsics
-// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK_LIB_OPTNONE: Function Attrs: {{.*}}noinline
+// CHECK_LIB_OPTNONE: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_LIB_OPTNONE: call void @_Z4mainj
+
+// CHECK_CS_OPTNONE: Function Attrs: {{.*}}noinline
+// CHECK_CS_OPTNONE: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_CS_OPTNONE: call void @_Z4mainj
+
+// CHECK_OPT: Function Attrs: {{.*}}noinline
+// CHECK_OPT: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is inlined as optimization is enabled
+// CHECK_OPT: _Z4mainj.exit:
+
// CHECK: ret void
[numthreads(1,1,1)]
@@ -90,19 +137,32 @@ void main(unsigned int GI : SV_GroupIndex) {
tmpIndices[i] = Indices[i];
}
-// The mangled version of main only remains without inlining
-// because it has internal linkage from the start
-// Note main functions get the norecurse attrib, which IntAttr reflects
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define internal void @_Z6main10v() [[IntAttr]]
-// NOINLINE: ret void
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define internal void @_Z6main10v() [[ExtAttr]]
+// CHECK_LIB_OPTNONE: ret void
+
+// CHECK_CS_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_CS_OPTNONE: define internal void @_Z6main10v() [[IntAttr]]
+// CHECK_CS_OPTNONE: ret void
+
+// Check internal function @_Z6main10v is not generated as it should be inlined
+// CHECK_OPT-NOT: define internal void @_Z6main10v
// The unmangled version is not inlined, EntryAttr reflects that
-// CHECK: Function Attrs: {{.*}}noinline
-// CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Make sure function calls are inlined when AlwaysInline is run
-// This only leaves calls to llvm. intrinsics
-// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK_LIB_OPTNONE: Function Attrs: {{.*}}noinline
+// CHECK_LIB_OPTNONE: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_LIB_OPTNONE: call void @_Z6main10v
+
+// CHECK_CS_OPTNONE: Function Attrs: {{.*}}noinline
+// CHECK_CS_OPTNONE: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_CS_OPTNONE: call void @_Z6main10v
+
+// CHECK_OPT: Function Attrs: {{.*}}noinline
+// CHECK_OPT: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure internal function is inlined as optimization is enabled
+// CHECK_OPT: _Z6main10v.exit:
// CHECK: ret void
[numthreads(1,1,1)]
@@ -111,6 +171,14 @@ void main10() {
main(10);
}
-// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
-// CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
-// CHECK: attributes [[EntryAttr]] = {{.*}} noinline
+// CHECK_LIB_OPTNONE: attributes [[ExtAttr]] = {{.*}} optnone
+// CHECK_LIB_OPTNONE: attributes [[ExportAttr]] = {{.*}} optnone
+
+// CHECK_CS_OPTNONE: attributes [[ExtAttr]] ={{.*}} alwaysinline
+// CHECK_CS_OPTNONE: attributes [[IntAttr]] = {{.*}} optnone
+// CHECK_CS_OPTNONE: attributes [[EntryAttr]] = {{.*}} noinline
+
+// CHECK_OPT: attributes [[SwapOptAttr]] ={{.*}} alwaysinline
+// CHECK_OPT: attributes [[BubOptAttr]] ={{.*}} alwaysinline
+// CHECK_OPT: attributes [[RemOptAttr]] ={{.*}} alwaysinline
+// CHECK_OPT: attributes [[EntryAttr]] ={{.*}} noinline
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
index 5a3bdc3d4d38ee8..5c0e2a71f94b82b 100644
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
@@ -25,7 +25,7 @@ void main() {
}
// This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators.
-// CHECK: define linkonce_odr noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
+// CHECK: define linkonce_odr noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %this) [[Attr:\#[0-9]+]] align 2 {
// CHECK-NEXT:entry:
// CHECK-NEXT:%this.addr = alloca ptr, align 4
// CHECK-NEXT:%Another = alloca %struct.Pair, align 4
@@ -42,7 +42,7 @@ void main() {
// CHECK-NEXT:%0 = load i32, ptr %First2, align 4
// CHECK-NEXT:ret i32 %0
-// CHECK: define linkonce_odr noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
+// CHECK: define linkonce_odr noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %this) [[Attr]] align 2 {
// CHECK-NEXT:entry:
// CHECK-NEXT:%this.addr = alloca ptr, align 4
// CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 4
@@ -53,3 +53,5 @@ void main() {
// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
// CHECK-NEXT:%0 = load i32, ptr %Second, align 4
// CHECK-NEXT:ret i32 %0
+
+// CHECK: attributes [[Attr]] = {{.*}}alwaysinline
More information about the cfe-commits
mailing list