[clang] [HLSL] set alwaysinline on HLSL functions (PR #106588)

Greg Roth via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 10 19:36:32 PDT 2024


https://github.com/pow2clk updated https://github.com/llvm/llvm-project/pull/106588

>From 12253818bd47aa8c324f6222586965f356b11c90 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Wed, 24 Jul 2024 16:49:19 -0600
Subject: [PATCH 01/10] [HLSL] set alwaysinline on HLSL functions

HLSL inlines all its functions by default. This uses the alwaysinline
attribute to force that in the corresponding pass for user functions
by default and overrides the default noinline of some implicit functions.
This makes an instance of explicit inlining for buffer subscripts unnecessary.

Adds tests for function and constructor inlining and augments some existing
tests to verify correct inlining of implicitly created functions as well.

incidentally restore RUN line that I believe was mistakenly removed as part of #88918

fixes #89282
---
 clang/lib/CodeGen/CGHLSLRuntime.cpp           |  17 ++-
 clang/lib/CodeGen/CodeGenFunction.cpp         |   4 +-
 clang/lib/Sema/HLSLExternalSemaSource.cpp     |   2 -
 .../GlobalConstructorFunction.hlsl            |  31 +++--
 .../CodeGenHLSL/GlobalConstructorLib.hlsl     |  23 +++-
 clang/test/CodeGenHLSL/GlobalDestructors.hlsl |  51 +++++---
 .../builtins/RWBuffer-constructor.hlsl        |   1 +
 .../builtins/RWBuffer-subscript.hlsl          |   5 +-
 .../test/CodeGenHLSL/inline-constructors.hlsl |  74 ++++++++++++
 clang/test/CodeGenHLSL/inline-functions.hlsl  | 114 ++++++++++++++++++
 10 files changed, 279 insertions(+), 43 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/inline-constructors.hlsl
 create mode 100644 clang/test/CodeGenHLSL/inline-functions.hlsl

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 4bd7b6ba58de0d..24d126ced0d9f7 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -414,9 +414,20 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
 
 void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD,
                                               llvm::Function *Fn) {
-  if (FD->isInExportDeclContext()) {
-    const StringRef ExportAttrKindStr = "hlsl.export";
-    Fn->addFnAttr(ExportAttrKindStr);
+  if (FD) { // "explicit" functions with declarations
+    if (FD->isInExportDeclContext()) {
+      const StringRef ExportAttrKindStr = "hlsl.export";
+      Fn->addFnAttr(ExportAttrKindStr);
+    }
+    // Respect noinline if the explicit functions use it
+    // otherwise default to alwaysinline
+    if (!Fn->hasFnAttribute(Attribute::NoInline))
+      Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+  } else { // "implicit" autogenerated functions with no declaration
+    // Implicit functions might get marked as noinline by default
+    // but we override that for HLSL
+    Fn->removeFnAttr(Attribute::NoInline);
+    Fn->addFnAttr(Attribute::AlwaysInline);
   }
 }
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index a5747283e98058..aceeed0e66d130 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1239,9 +1239,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
   if (getLangOpts().OpenMP && CurCodeDecl)
     CGM.getOpenMPRuntime().emitFunctionProlog(*this, CurCodeDecl);
 
-  if (FD && getLangOpts().HLSL) {
+  if (getLangOpts().HLSL) {
     // Handle emitting HLSL entry functions.
-    if (FD->hasAttr<HLSLShaderAttr>()) {
+    if (FD && FD->hasAttr<HLSLShaderAttr>()) {
       CGM.getHLSLRuntime().emitEntryFunction(FD, Fn);
     }
     CGM.getHLSLRuntime().setHLSLFunctionAttributes(FD, Fn);
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index 9aacbe4ad9548e..0a534d94192560 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -290,8 +290,6 @@ struct BuiltinTypeDeclBuilder {
                                              SourceLocation()));
     MethodDecl->setLexicalDeclContext(Record);
     MethodDecl->setAccess(AccessSpecifier::AS_public);
-    MethodDecl->addAttr(AlwaysInlineAttr::CreateImplicit(
-        AST, SourceRange(), AlwaysInlineAttr::CXX11_clang_always_inline));
     Record->addDecl(MethodDecl);
 
     return *this;
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
index f954c9d2f029f2..b39311ad67cd62 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
 
 int i;
 
@@ -7,7 +8,7 @@ __attribute__((constructor)) void call_me_first(void) {
 }
 
 __attribute__((constructor)) void then_call_me(void) {
-  i = 12;
+  i = 13;
 }
 
 __attribute__((destructor)) void call_me_last(void) {
@@ -21,11 +22,21 @@ void main(unsigned GI : SV_GroupIndex) {}
 // CHECK-NOT:@llvm.global_ctors
 // CHECK-NOT:@llvm.global_dtors
 
-//CHECK: define void @main()
-//CHECK-NEXT: entry:
-//CHECK-NEXT:   call void @"?call_me_first@@YAXXZ"()
-//CHECK-NEXT:   call void @"?then_call_me@@YAXXZ"()
-//CHECK-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-//CHECK-NEXT:   call void @"?main@@YAXI at Z"(i32 %0)
-//CHECK-NEXT:   call void @"?call_me_last@@YAXXZ"(
-//CHECK-NEXT:   ret void
+// CHECK: define void @main()
+// CHECK-NEXT: entry:
+// Verify function constructors are emitted
+// NOINLINE-NEXT:   call void @"?call_me_first@@YAXXZ"()
+// NOINLINE-NEXT:   call void @"?then_call_me@@YAXXZ"()
+// NOINLINE-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT:   call void @"?main@@YAXI at Z"(i32 %0)
+// NOINLINE-NEXT:   call void @"?call_me_last@@YAXXZ"(
+// NOINLINE-NEXT:   ret void
+
+// Verify constructor calls are inlined when AlwaysInline is run
+// INLINE-NEXT:   alloca
+// INLINE-NEXT:   store i32 12
+// INLINE-NEXT:   store i32 13
+// INLINE-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// INLINE-NEXT:   store i32 %
+// INLINE-NEXT:   store i32 0
+// INLINE:   ret void
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
index 2c5c4e19c3296d..78f6475462bc47 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
 
 // Make sure global variable for ctors exist for lib profile.
 // CHECK:@llvm.global_ctors
@@ -11,7 +12,11 @@ void FirstEntry() {}
 
 // CHECK: define void @FirstEntry()
 // CHECK-NEXT: entry:
-// CHECK-NEXT:   call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT:   call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT:   call void @"?FirstEntry@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
 
 [shader("compute")]
 [numthreads(1,1,1)]
@@ -19,5 +24,15 @@ void SecondEntry() {}
 
 // CHECK: define void @SecondEntry()
 // CHECK-NEXT: entry:
-// CHECK-NEXT:   call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
-// CHECK-NEXT:   call void @"?SecondEntry@@YAXXZ"()
+// NOINLINE-NEXT:   call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT:   call void @"?SecondEntry@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+
+// Verify the constructor is alwaysinline
+// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
+// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[IntAttr:\#[0-9]+]]
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
index 24c3c039fc6192..ea28354222f885 100644
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -1,10 +1,18 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
 
-// Make sure global variable for dtors exist for lib profile.
+// Tests that constructors and destructors are appropriately generated for globals
+// and that their calls are inlined when AlwaysInline is run
+// but global variables are retained for the library profiles
+
+// Make sure global variable for ctors/dtors exist for lib profile.
+// LIB:@llvm.global_ctors
 // LIB:@llvm.global_dtors
-// Make sure global variable for dtors removed for compute profile.
-// CS-NOT:llvm.global_dtors
+// Make sure global variable for ctors/dtors removed for compute profile.
+// CS-NOT:@llvm.global_ctors
+// CS-NOT:@llvm.global_dtors
 
 struct Tail {
   Tail() {
@@ -46,22 +54,25 @@ void main(unsigned GI : SV_GroupIndex) {
   Wag();
 }
 
-// Make sure global variable for ctors/dtors removed.
-// CHECK-NOT:@llvm.global_ctors
-// CHECK-NOT:@llvm.global_dtors
-//CHECK:      define void @main()
-//CHECK-NEXT: entry:
-//CHECK-NEXT:   call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
-//CHECK-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-//CHECK-NEXT:   call void @"?main@@YAXI at Z"(i32 %0)
-//CHECK-NEXT:   call void @_GLOBAL__D_a()
-//CHECK-NEXT:   ret void
+// CHECK:      define void @main()
+// CHECK-NEXT: entry:
+// Verify destructor is emitted
+// NOINLINE-NEXT:   call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
+// NOINLINE-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT:   call void @"?main@@YAXI at Z"(i32 %0)
+// NOINLINE-NEXT:   call void @_GLOBAL__D_a()
+// NOINLINE-NEXT:   ret void
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// INLINE:   ret void
 
 // This is really just a sanity check I needed for myself to verify that
 // function scope static variables also get destroyed properly.
 
-//CHECK: define internal void @_GLOBAL__D_a()
-//CHECK-NEXT: entry:
-//CHECK-NEXT:   call void @"??1Tail@@QAA at XZ"(ptr @"?T@?1??Wag@@YAXXZ at 4UTail@@A")
-//CHECK-NEXT:   call void @"??1Pupper@@QAA at XZ"(ptr @"?GlobalPup@@3UPupper@@A")
-//CHECK-NEXT:   ret void
+// NOINLINE: define internal void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]]
+// NOINLINE-NEXT: entry:
+// NOINLINE-NEXT:   call void @"??1Tail@@QAA at XZ"(ptr @"?T@?1??Wag@@YAXXZ at 4UTail@@A")
+// NOINLINE-NEXT:   call void @"??1Pupper@@QAA at XZ"(ptr @"?GlobalPup@@3UPupper@@A")
+// NOINLINE-NEXT:   ret void
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
index baddfcf2cf1d52..174f4c3eaaad26 100644
--- a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV
 
 RWBuffer<float> Buf;
diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
index da8a1e538ec5e7..2a350c1619bd6e 100644
--- a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
@@ -11,6 +11,7 @@ void main(unsigned GI : SV_GroupIndex) {
 // Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy
 // and confusing to follow so the match here is pretty weak.
 
-// CHECK: define internal void @"?main@@YAXI at Z"
-// CHECK-NOT: call
+// CHECK: define void @main()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// CHECK-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
 // CHECK: ret void
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
new file mode 100644
index 00000000000000..40c7a42bdc262e
--- /dev/null
+++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+
+// Tests that implicit contstructor calls for user classes will always be inlined.
+
+struct Weed {
+  Weed() {Count += 1;}
+  [[maybe_unused]] void pull() {Count--;}
+  static int weedCount() { return Count; }
+private:
+  static int Count;
+
+} YardWeeds;
+
+int Weed::Count = 1; // It begins. . .
+
+struct Kitty {
+  unsigned burrsInFur;
+
+  Kitty() {
+    burrsInFur = 0;
+  }
+
+  void wanderInYard(int hours) {
+    burrsInFur = hours*Weed::weedCount()/8;
+  }
+
+  void lick() {
+    if(burrsInFur) {
+      burrsInFur--;
+      Weed w;
+    }
+  }
+
+} Nion;
+
+void NionsDay(int hours) {
+  static Kitty Nion;
+  Nion.wanderInYard(hours);
+  while(Nion.burrsInFur) Nion.lick();
+}
+
+// CHECK:      define void @main()
+// CHECK-NEXT: entry:
+// Verify constructor is emitted
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
+// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT:    call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK:         ret void
+[shader("compute")]
+[numthreads(1,1,1)]
+void main(unsigned GI : SV_GroupIndex) {
+  NionsDay(10);
+}
+
+
+// CHECK:      define void @rainyMain()
+// CHECK-NEXT: entry:
+// Verify constructor is emitted
+// NOINLINE-NEXT:   call void @_GLOBAL__sub_I_inline_constructors.hlsl()
+// NOINLINE-NEXT:   call void @"?rainyMain@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT:      call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK:           ret void
+[shader("compute")]
+[numthreads(1,1,1)]
+void rainyMain() {
+  NionsDay(1);
+}
+
diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
new file mode 100644
index 00000000000000..9c00e096a9eef2
--- /dev/null
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -0,0 +1,114 @@
+// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+
+// Tests that user functions will always be inlined.
+// This includes exported functions and mangled entry point implementation functions.
+// The unmangled entry functions must not be alwaysinlined.
+
+#define MAX 100
+
+float nums[MAX];
+
+// Verify that all functions have the alwaysinline attribute
+// CHECK: Function Attrs: alwaysinline
+// CHECK: define void @"?swap@@YAXY0GE at III@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
+// CHECK: ret void
+// Swap the values of Buf at indices ix1 and ix2
+void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
+  float tmp = Buf[ix1];
+  Buf[ix1] = Buf[ix2];
+  Buf[ix2] = tmp;
+}
+
+// CHECK: Function Attrs: alwaysinline
+// CHECK: define void @"?BubbleSort@@YAXY0GE at II@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
+// CHECK: ret void
+// Inefficiently sort Buf in place
+void BubbleSort(unsigned Buf[MAX], unsigned size) {
+  bool swapped = true;
+  while (swapped) {
+    swapped = false;
+    for (unsigned i = 1; i < size; i++) {
+      if (Buf[i] < Buf[i-1]) {
+	swap(Buf, i, i-1);
+	swapped = true;
+      }
+    }
+  }
+}
+
+// Note ExtAttr is the inlined export set of attribs
+// CHECK: Function Attrs: alwaysinline
+// CHECK: define noundef i32 @"?RemoveDupes@@YAIY0GE at II@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr:\#[0-9]+]]
+// CHECK: ret i32
+// Sort Buf and remove any duplicate values
+// returns the number of values left
+export
+unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
+  BubbleSort(Buf, size);
+  unsigned insertPt = 0;
+  for (unsigned i = 1; i < size; i++) {
+    if (Buf[i] == Buf[i-1])
+      insertPt++;
+    else
+      Buf[insertPt] = Buf[i];
+  }
+  return insertPt;
+}
+
+
+RWBuffer<unsigned> Indices;
+
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @"?main@@YAXI at Z"(i32 noundef %GI) [[IntAttr]]
+// NOINLINE: ret void
+
+// The unmangled version is not inlined, EntryAttr reflects that
+// CHECK: Function Attrs: convergent norecurse
+// CHECK: define void @main() [[EntryAttr:\#[0-9]+]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+[numthreads(1,1,1)]
+[shader("compute")]
+void main(unsigned int GI : SV_GroupIndex) {
+  unsigned tmpIndices[MAX];
+  if (GI > MAX) return;
+  for (unsigned i = 1; i < GI; i++)
+    tmpIndices[i] = Indices[i];
+  RemoveDupes(tmpIndices, GI);
+  for (unsigned i = 1; i < GI; i++)
+    tmpIndices[i] = Indices[i];
+}
+
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @"?main10@@YAXXZ"() [[IntAttr]]
+// NOINLINE: ret void
+
+// The unmangled version is not inlined, EntryAttr reflects that
+// CHECK: Function Attrs: convergent norecurse
+// CHECK: define void @main10() [[EntryAttr]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+[numthreads(1,1,1)]
+[shader("compute")]
+void main10() {
+  main(10);
+}
+
+// CHECK: attributes [[IntAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
+// CHECK-NOT: attributes [[EntryAttr]] = {{.*}} alwaysinline

>From b7e367a51f61c511bca45eed28e2ea0701c3e4f3 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Thu, 29 Aug 2024 12:59:03 -0600
Subject: [PATCH 02/10] update RWBuffer-AST test for later alwaysinline marking

Previously, the alwaysinline attribute was set for the RWBuffer
subscript operator as soon as it was created. Since they are all
in a common place now, that setting was redundant, but it does mean
that it won't show up in the AST
---
 clang/test/AST/HLSL/RWBuffer-AST.hlsl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/test/AST/HLSL/RWBuffer-AST.hlsl b/clang/test/AST/HLSL/RWBuffer-AST.hlsl
index 1f6ef60e121ea5..dd0208d50a05b1 100644
--- a/clang/test/AST/HLSL/RWBuffer-AST.hlsl
+++ b/clang/test/AST/HLSL/RWBuffer-AST.hlsl
@@ -42,7 +42,6 @@ RWBuffer<float> Buffer;
 // CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}}
 // CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'const RWBuffer<element_type>' lvalue implicit this
 // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int'
-// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
 
 // CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &(unsigned int)'
 // CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int'
@@ -52,7 +51,6 @@ RWBuffer<float> Buffer;
 // CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}}
 // CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'RWBuffer<element_type>' lvalue implicit this
 // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int'
-// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
 
 // CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class RWBuffer definition
 

>From bdfb8dc7ee3d6c0ec8fa41df710bfd63fb4a7392 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Wed, 4 Sep 2024 09:46:21 -0600
Subject: [PATCH 03/10] Respond to feedback

Restore additional alwaysinline setting for RWBuffer subscripts

Add HLSL check to prevent marking autogenerated functions as noinline
---
 clang/lib/CodeGen/CGHLSLRuntime.cpp       | 3 ---
 clang/lib/CodeGen/CodeGenModule.cpp       | 3 ++-
 clang/lib/Sema/HLSLExternalSemaSource.cpp | 2 ++
 clang/test/AST/HLSL/RWBuffer-AST.hlsl     | 2 ++
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 24d126ced0d9f7..c14f29d3cc0f5d 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -424,9 +424,6 @@ void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD,
     if (!Fn->hasFnAttribute(Attribute::NoInline))
       Fn->addFnAttr(llvm::Attribute::AlwaysInline);
   } else { // "implicit" autogenerated functions with no declaration
-    // Implicit functions might get marked as noinline by default
-    // but we override that for HLSL
-    Fn->removeFnAttr(Attribute::NoInline);
     Fn->addFnAttr(Attribute::AlwaysInline);
   }
 }
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index df4c13c9ad97aa..c2d084f9abde61 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2474,7 +2474,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     // If we don't have a declaration to control inlining, the function isn't
     // explicitly marked as alwaysinline for semantic reasons, and inlining is
     // disabled, mark the function as noinline.
-    if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
+    // HLSL functions must be always inlined
+    if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) && !getLangOpts().HLSL &&
         CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
       B.addAttribute(llvm::Attribute::NoInline);
 
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index 0a534d94192560..9aacbe4ad9548e 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -290,6 +290,8 @@ struct BuiltinTypeDeclBuilder {
                                              SourceLocation()));
     MethodDecl->setLexicalDeclContext(Record);
     MethodDecl->setAccess(AccessSpecifier::AS_public);
+    MethodDecl->addAttr(AlwaysInlineAttr::CreateImplicit(
+        AST, SourceRange(), AlwaysInlineAttr::CXX11_clang_always_inline));
     Record->addDecl(MethodDecl);
 
     return *this;
diff --git a/clang/test/AST/HLSL/RWBuffer-AST.hlsl b/clang/test/AST/HLSL/RWBuffer-AST.hlsl
index dd0208d50a05b1..1f6ef60e121ea5 100644
--- a/clang/test/AST/HLSL/RWBuffer-AST.hlsl
+++ b/clang/test/AST/HLSL/RWBuffer-AST.hlsl
@@ -42,6 +42,7 @@ RWBuffer<float> Buffer;
 // CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}}
 // CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'const RWBuffer<element_type>' lvalue implicit this
 // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int'
+// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
 
 // CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &(unsigned int)'
 // CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int'
@@ -51,6 +52,7 @@ RWBuffer<float> Buffer;
 // CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}}
 // CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'RWBuffer<element_type>' lvalue implicit this
 // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int'
+// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
 
 // CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class RWBuffer definition
 

>From fc22a5a9f2f9678393c563de87d8326112e5e0c9 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Wed, 4 Sep 2024 10:04:54 -0600
Subject: [PATCH 04/10] Default to safer behavior respecting noinline

For whatever reason a function might have noinline, if we try to set
alwaysinline, we get an incompatible attribute error. This avoids
that in any case.
---
 clang/lib/CodeGen/CGHLSLRuntime.cpp | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index c14f29d3cc0f5d..e542605521e4fa 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -414,18 +414,13 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
 
 void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD,
                                               llvm::Function *Fn) {
-  if (FD) { // "explicit" functions with declarations
-    if (FD->isInExportDeclContext()) {
-      const StringRef ExportAttrKindStr = "hlsl.export";
-      Fn->addFnAttr(ExportAttrKindStr);
-    }
-    // Respect noinline if the explicit functions use it
-    // otherwise default to alwaysinline
-    if (!Fn->hasFnAttribute(Attribute::NoInline))
-      Fn->addFnAttr(llvm::Attribute::AlwaysInline);
-  } else { // "implicit" autogenerated functions with no declaration
-    Fn->addFnAttr(Attribute::AlwaysInline);
+  if (FD && FD->isInExportDeclContext()) {
+    const StringRef ExportAttrKindStr = "hlsl.export";
+    Fn->addFnAttr(ExportAttrKindStr);
   }
+  // Respect noinline if used else default to alwaysinline
+  if (!Fn->hasFnAttribute(Attribute::NoInline))
+    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
 }
 
 static void gatherFunctions(SmallVectorImpl<Function *> &Fns, llvm::Module &M,

>From 5d970dd9327f838c969045b7db7a26932de6fa29 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Wed, 4 Sep 2024 10:14:51 -0600
Subject: [PATCH 05/10] clang-format

---
 clang/lib/CodeGen/CodeGenModule.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c2d084f9abde61..e62b127c0de418 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2475,7 +2475,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     // explicitly marked as alwaysinline for semantic reasons, and inlining is
     // disabled, mark the function as noinline.
     // HLSL functions must be always inlined
-    if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) && !getLangOpts().HLSL &&
+    if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
+        !getLangOpts().HLSL &&
         CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
       B.addAttribute(llvm::Attribute::NoInline);
 

>From 7709a26b77dc4d910c53909cd0b159d1714efd3a Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Sat, 7 Sep 2024 00:15:03 -0600
Subject: [PATCH 06/10] Revise approach entirely

Move all logic into SetLLVMFunctionAttributesForDefinition where
most inlining attribute calculations are done. The way autogenerated
init functions and user functions were created meant that this was hit
at different times for each relative to setHLSLFunctionAttributes.
Instead of trying to resolve that fundamental issue or work around it,
better to keep all the setting in the same place so it doesn't matter
where it is called relative to function body creation.

This required making exceptions for entry functions as they will get
attributes from this code as well and we want it to not be always
inlined and receive the relevant optimization attributes that allows.
---
 clang/lib/CodeGen/CGHLSLRuntime.cpp           |  5 +---
 clang/lib/CodeGen/CodeGenFunction.cpp         |  4 +--
 clang/lib/CodeGen/CodeGenModule.cpp           | 14 ++++++----
 .../test/CodeGenHLSL/inline-constructors.hlsl |  4 ++-
 clang/test/CodeGenHLSL/inline-functions.hlsl  | 26 ++++++++++---------
 5 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index e542605521e4fa..4bd7b6ba58de0d 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -414,13 +414,10 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
 
 void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD,
                                               llvm::Function *Fn) {
-  if (FD && FD->isInExportDeclContext()) {
+  if (FD->isInExportDeclContext()) {
     const StringRef ExportAttrKindStr = "hlsl.export";
     Fn->addFnAttr(ExportAttrKindStr);
   }
-  // Respect noinline if used else default to alwaysinline
-  if (!Fn->hasFnAttribute(Attribute::NoInline))
-    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
 }
 
 static void gatherFunctions(SmallVectorImpl<Function *> &Fns, llvm::Module &M,
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index aceeed0e66d130..a5747283e98058 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1239,9 +1239,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
   if (getLangOpts().OpenMP && CurCodeDecl)
     CGM.getOpenMPRuntime().emitFunctionProlog(*this, CurCodeDecl);
 
-  if (getLangOpts().HLSL) {
+  if (FD && getLangOpts().HLSL) {
     // Handle emitting HLSL entry functions.
-    if (FD && FD->hasAttr<HLSLShaderAttr>()) {
+    if (FD->hasAttr<HLSLShaderAttr>()) {
       CGM.getHLSLRuntime().emitEntryFunction(FD, Fn);
     }
     CGM.getHLSLRuntime().setHLSLFunctionAttributes(FD, Fn);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index e62b127c0de418..f7d7a016a9c540 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2471,12 +2471,13 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     B.addAttribute(llvm::Attribute::StackProtectReq);
 
   if (!D) {
+    // HLSL functions must always be inlined
+    if (getLangOpts().HLSL && !F->hasFnAttribute("hlsl.shader"))
+      B.addAttribute(llvm::Attribute::AlwaysInline);
     // If we don't have a declaration to control inlining, the function isn't
     // explicitly marked as alwaysinline for semantic reasons, and inlining is
     // disabled, mark the function as noinline.
-    // HLSL functions must be always inlined
-    if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
-        !getLangOpts().HLSL &&
+    else if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
         CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
       B.addAttribute(llvm::Attribute::NoInline);
 
@@ -2504,9 +2505,12 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>();
   ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>();
 
-  // Add optnone, but do so only if the function isn't always_inline.
-  if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
+  // alwaysinline all HLSL functions save entry points
+  if (getLangOpts().HLSL && !F->hasFnAttribute("hlsl.shader"))
+    B.addAttribute(llvm::Attribute::AlwaysInline);
+  else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
       !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+    // Add optnone, but do so only if the function isn't always_inline.
     B.addAttribute(llvm::Attribute::OptimizeNone);
 
     // OptimizeNone implies noinline; we should not be inlining such functions.
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
index 40c7a42bdc262e..995878a9c0f798 100644
--- a/clang/test/CodeGenHLSL/inline-constructors.hlsl
+++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl
@@ -2,8 +2,10 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
 
-// Tests that implicit contstructor calls for user classes will always be inlined.
+// Tests that implicit constructor calls for user classes will always be inlined.
 
 struct Weed {
   Weed() {Count += 1;}
diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
index 9c00e096a9eef2..1dc157cdbb64ae 100644
--- a/clang/test/CodeGenHLSL/inline-functions.hlsl
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -1,7 +1,9 @@
 // RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
 // RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
 // RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
 // RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
 
 // Tests that user functions will always be inlined.
 // This includes exported functions and mangled entry point implementation functions.
@@ -12,9 +14,9 @@
 float nums[MAX];
 
 // Verify that all functions have the alwaysinline attribute
-// CHECK: Function Attrs: alwaysinline
-// CHECK: define void @"?swap@@YAXY0GE at III@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
-// CHECK: ret void
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @"?swap@@YAXY0GE at III@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
+// NOINLINE: ret void
 // Swap the values of Buf at indices ix1 and ix2
 void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
   float tmp = Buf[ix1];
@@ -22,9 +24,9 @@ void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
   Buf[ix2] = tmp;
 }
 
-// CHECK: Function Attrs: alwaysinline
-// CHECK: define void @"?BubbleSort@@YAXY0GE at II@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
-// CHECK: ret void
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @"?BubbleSort@@YAXY0GE at II@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
+// NOINLINE: ret void
 // Inefficiently sort Buf in place
 void BubbleSort(unsigned Buf[MAX], unsigned size) {
   bool swapped = true;
@@ -41,7 +43,7 @@ void BubbleSort(unsigned Buf[MAX], unsigned size) {
 
 // Note ExtAttr is the inlined export set of attribs
 // CHECK: Function Attrs: alwaysinline
-// CHECK: define noundef i32 @"?RemoveDupes@@YAIY0GE at II@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr:\#[0-9]+]]
+// CHECK: define noundef i32 @"?RemoveDupes@@YAIY0GE at II@Z"(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) {{[a-z_ ]*}}[[ExtAttr:\#[0-9]+]]
 // CHECK: ret i32
 // Sort Buf and remove any duplicate values
 // returns the number of values left
@@ -69,8 +71,8 @@ RWBuffer<unsigned> Indices;
 // NOINLINE: ret void
 
 // The unmangled version is not inlined, EntryAttr reflects that
-// CHECK: Function Attrs: convergent norecurse
-// CHECK: define void @main() [[EntryAttr:\#[0-9]+]]
+// CHECK-NOT: Function Attrs: alwaysinline
+// CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
 // Make sure function calls are inlined when AlwaysInline is run
 // This only leaves calls to llvm. intrinsics
 // INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
@@ -96,8 +98,8 @@ void main(unsigned int GI : SV_GroupIndex) {
 // NOINLINE: ret void
 
 // The unmangled version is not inlined, EntryAttr reflects that
-// CHECK: Function Attrs: convergent norecurse
-// CHECK: define void @main10() [[EntryAttr]]
+// CHECK-NOT: Function Attrs: alwaysinline
+// CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
 // Make sure function calls are inlined when AlwaysInline is run
 // This only leaves calls to llvm. intrinsics
 // INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
@@ -109,6 +111,6 @@ void main10() {
   main(10);
 }
 
-// CHECK: attributes [[IntAttr]] = {{.*}} alwaysinline
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
 // CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
 // CHECK-NOT: attributes [[EntryAttr]] = {{.*}} alwaysinline

>From cc719cc3f7a48c1356d7fdfa5ad6c0b6455aa632 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Mon, 9 Sep 2024 16:14:20 -0600
Subject: [PATCH 07/10] clang-format again

---
 clang/lib/CodeGen/CodeGenModule.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index f7d7a016a9c540..64f7d281140941 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2478,7 +2478,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     // explicitly marked as alwaysinline for semantic reasons, and inlining is
     // disabled, mark the function as noinline.
     else if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
-        CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
+             CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
       B.addAttribute(llvm::Attribute::NoInline);
 
     F->addFnAttrs(B);
@@ -2509,7 +2509,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   if (getLangOpts().HLSL && !F->hasFnAttribute("hlsl.shader"))
     B.addAttribute(llvm::Attribute::AlwaysInline);
   else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
-      !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+           !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
     // Add optnone, but do so only if the function isn't always_inline.
     B.addAttribute(llvm::Attribute::OptimizeNone);
 
@@ -2530,7 +2530,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     B.addAttribute(llvm::Attribute::NoInline);
   } else if (D->hasAttr<NoDuplicateAttr>()) {
     B.addAttribute(llvm::Attribute::NoDuplicate);
-  } else if (D->hasAttr<NoInlineAttr>() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+  } else if (D->hasAttr<NoInlineAttr>() &&
+             !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
     // Add noinline if the function isn't always_inline.
     B.addAttribute(llvm::Attribute::NoInline);
   } else if (D->hasAttr<AlwaysInlineAttr>() &&

>From 09c5141233c1f858aca806d14fb44f98943b7533 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Mon, 9 Sep 2024 18:11:45 -0600
Subject: [PATCH 08/10] reformat comments

---
 clang/lib/CodeGen/CodeGenModule.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 64f7d281140941..5a9275f88eb750 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2471,7 +2471,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     B.addAttribute(llvm::Attribute::StackProtectReq);
 
   if (!D) {
-    // HLSL functions must always be inlined
+    // Non-entry HLSL functions must always be inlined.
     if (getLangOpts().HLSL && !F->hasFnAttribute("hlsl.shader"))
       B.addAttribute(llvm::Attribute::AlwaysInline);
     // If we don't have a declaration to control inlining, the function isn't
@@ -2505,7 +2505,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>();
   ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>();
 
-  // alwaysinline all HLSL functions save entry points
+  // Non-entry HLSL functions must always be inlined.
   if (getLangOpts().HLSL && !F->hasFnAttribute("hlsl.shader"))
     B.addAttribute(llvm::Attribute::AlwaysInline);
   else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&

>From 71a4ea7dd81696d346926e41a2fdb20e75943386 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Tue, 10 Sep 2024 00:26:21 -0600
Subject: [PATCH 09/10] noinline adjustments: warning, entry function

Set noinline explicitly on the outermost entry function and use that
attribute to decide to apply alwaysinline.
Generate a warning and ignore user-specified noinline

Incidentally tidy up some RUN lines
---
 .../clang/Basic/DiagnosticFrontendKinds.td     |  4 ++++
 clang/lib/CodeGen/CGHLSLRuntime.cpp            |  1 +
 clang/lib/CodeGen/CodeGenModule.cpp            |  9 ++++++---
 clang/test/CodeGenHLSL/inline-functions.hlsl   | 18 +++++++++---------
 4 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 8a1462c670d68f..e583da15eb91a5 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -378,4 +378,8 @@ def warn_missing_symbol_graph_dir : Warning<
 def err_ast_action_on_llvm_ir : Error<
   "cannot apply AST actions to LLVM IR file '%0'">,
   DefaultFatal;
+
+def warn_unsupported_attribute_ignored : Warning<
+  "ignoring the '%0' attribute because %1 does not support it">,
+  InGroup<IgnoredAttributes>;
 }
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 4bd7b6ba58de0d..b683330358595b 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -337,6 +337,7 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
                 NumThreadsAttr->getZ());
     Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr);
   }
+  Fn->addFnAttr(llvm::Attribute::NoInline);
 }
 
 static Value *buildVectorInput(IRBuilder<> &B, Function *F, llvm::Type *Ty) {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 5a9275f88eb750..558eb1155827ab 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2472,7 +2472,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
 
   if (!D) {
     // Non-entry HLSL functions must always be inlined.
-    if (getLangOpts().HLSL && !F->hasFnAttribute("hlsl.shader"))
+    if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline))
       B.addAttribute(llvm::Attribute::AlwaysInline);
     // If we don't have a declaration to control inlining, the function isn't
     // explicitly marked as alwaysinline for semantic reasons, and inlining is
@@ -2506,9 +2506,12 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>();
 
   // Non-entry HLSL functions must always be inlined.
-  if (getLangOpts().HLSL && !F->hasFnAttribute("hlsl.shader"))
+  if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline)) {
+    if (D->hasAttr<NoInlineAttr>())
+      getDiags().Report(D->getLocation(), diag::warn_unsupported_attribute_ignored)
+	<< "noinline" << "HLSL";
     B.addAttribute(llvm::Attribute::AlwaysInline);
-  else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
+  } else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
            !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
     // Add optnone, but do so only if the function isn't always_inline.
     B.addAttribute(llvm::Attribute::OptimizeNone);
diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
index 1dc157cdbb64ae..7dd905e966e069 100644
--- a/clang/test/CodeGenHLSL/inline-functions.hlsl
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -x hlsl -triple  dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
 
 // Tests that user functions will always be inlined.
 // This includes exported functions and mangled entry point implementation functions.
@@ -71,7 +71,7 @@ RWBuffer<unsigned> Indices;
 // NOINLINE: ret void
 
 // The unmangled version is not inlined, EntryAttr reflects that
-// CHECK-NOT: Function Attrs: alwaysinline
+// CHECK: Function Attrs: {{.*}}noinline
 // CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
 // Make sure function calls are inlined when AlwaysInline is run
 // This only leaves calls to llvm. intrinsics
@@ -98,7 +98,7 @@ void main(unsigned int GI : SV_GroupIndex) {
 // NOINLINE: ret void
 
 // The unmangled version is not inlined, EntryAttr reflects that
-// CHECK-NOT: Function Attrs: alwaysinline
+// CHECK: Function Attrs: {{.*}}noinline
 // CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
 // Make sure function calls are inlined when AlwaysInline is run
 // This only leaves calls to llvm. intrinsics
@@ -113,4 +113,4 @@ void main10() {
 
 // NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
 // CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
-// CHECK-NOT: attributes [[EntryAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[EntryAttr]] = {{.*}} noinline

>From b68802397344e3b9207dde901b3baa685b4eac54 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth at microsoft.com>
Date: Tue, 10 Sep 2024 20:36:11 -0600
Subject: [PATCH 10/10] clang-format (again)

---
 clang/lib/CodeGen/CodeGenModule.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 558eb1155827ab..5059ff9b013d23 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2508,11 +2508,12 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   // Non-entry HLSL functions must always be inlined.
   if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline)) {
     if (D->hasAttr<NoInlineAttr>())
-      getDiags().Report(D->getLocation(), diag::warn_unsupported_attribute_ignored)
-	<< "noinline" << "HLSL";
+      getDiags().Report(D->getLocation(),
+                        diag::warn_unsupported_attribute_ignored)
+          << "noinline" << "HLSL";
     B.addAttribute(llvm::Attribute::AlwaysInline);
   } else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
-           !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+             !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
     // Add optnone, but do so only if the function isn't always_inline.
     B.addAttribute(llvm::Attribute::OptimizeNone);
 



More information about the cfe-commits mailing list