[clang] [clang][SME] Ignore flatten/clang::always_inline statements for callees with mismatched streaming attributes (PR #116391)

Benjamin Maxwell via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 26 05:57:36 PST 2024


https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/116391

>From 90daf9c544bcb776c8a68ad504ba5eda50eafe8a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 15 Nov 2024 14:35:41 +0000
Subject: [PATCH 1/6] [clang][SME] Ignore flatten for callees mismatched
 streaming attributes

If `__attribute__((flatten))` is used on a function don't inline any
callees with incompatible streaming attributes. Without this check,
clang may produce incorrect code when `flatten` is used in code with
streaming functions.

Note: The docs for flatten say it can be ignored when inlining is
impossible: "causes calls within the attributed function to be inlined
unless it is impossible to do so".
---
 clang/lib/CodeGen/CGCall.cpp                  | 11 ++-
 clang/lib/CodeGen/TargetInfo.h                |  9 +++
 clang/lib/CodeGen/Targets/AArch64.cpp         | 64 +++++++++++++---
 .../AArch64/sme-flatten-streaming-attrs.c     | 74 +++++++++++++++++++
 4 files changed, 143 insertions(+), 15 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 8f4f5d3ed81601..b8a968fdf4e9eb 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5112,9 +5112,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
   // Some architectures (such as x86-64) have the ABI changed based on
   // attribute-target/features. Give them a chance to diagnose.
-  CGM.getTargetCodeGenInfo().checkFunctionCallABI(
-      CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl),
-      dyn_cast_or_null<FunctionDecl>(TargetDecl), CallArgs, RetTy);
+  const FunctionDecl *CallerDecl = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
+  const FunctionDecl *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl);
+  CGM.getTargetCodeGenInfo().checkFunctionCallABI(CGM, Loc, CallerDecl,
+                                                  CalleeDecl, CallArgs, RetTy);
 
   // 1. Set up the arguments.
 
@@ -5705,7 +5706,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   // FIXME: should this really take priority over __try, below?
   if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
       !InNoInlineAttributedStmt &&
-      !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) {
+      !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>()) &&
+      !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI(
+          CallerDecl, CalleeDecl)) {
     Attrs =
         Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
   }
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 373f8b8a80fdb1..23ff476b0e33ce 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -98,6 +98,15 @@ class TargetCodeGenInfo {
                                     const CallArgList &Args,
                                     QualType ReturnType) const {}
 
+  /// Returns true if inlining the function call would produce incorrect code
+  /// for the current target and should be ignored (even with the always_inline
+  /// or flatten attributes).
+  virtual bool
+  wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller,
+                                      const FunctionDecl *Callee) const {
+    return false;
+  }
+
   /// Determines the size of struct _Unwind_Exception on this platform,
   /// in 8-bit units.  The Itanium ABI defines this as:
   ///   struct _Unwind_Exception {
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 9320c6ef06efab..a9ea84b6575f92 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -177,6 +177,9 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
                             const FunctionDecl *Callee, const CallArgList &Args,
                             QualType ReturnType) const override;
 
+  bool wouldInliningViolateFunctionCallABI(
+      const FunctionDecl *Caller, const FunctionDecl *Callee) const override;
+
 private:
   // Diagnose calls between functions with incompatible Streaming SVE
   // attributes.
@@ -1143,12 +1146,20 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
   }
 }
 
-void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
-    CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
-    const FunctionDecl *Callee) const {
-  if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
-    return;
+enum class ArmStreamingInlinability : uint8_t {
+  Ok = 0,
+  IncompatibleStreamingModes = 1,
+  MismatchedStreamingCompatibility = 1 << 1,
+  CalleeHasNewZA = 1 << 2,
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/CalleeHasNewZA),
+};
 
+/// Determines if there are any streaming ABI issues with inlining \p Callee
+/// into \p Caller. Returns the issues in the ArmStreamingInlinability bit enum
+/// (multiple bits can be set).
+static ArmStreamingInlinability
+GetArmStreamingInlinability(const FunctionDecl *Caller,
+                            const FunctionDecl *Callee) {
   bool CallerIsStreaming =
       IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
   bool CalleeIsStreaming =
@@ -1156,17 +1167,41 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
   bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
   bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
 
+  ArmStreamingInlinability Inlinability = ArmStreamingInlinability::Ok;
+
   if (!CalleeIsStreamingCompatible &&
-      (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible))
+      (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
+    Inlinability |= ArmStreamingInlinability::MismatchedStreamingCompatibility;
+    if (CalleeIsStreaming)
+      Inlinability |= ArmStreamingInlinability::IncompatibleStreamingModes;
+  }
+  if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
+    if (NewAttr->isNewZA())
+      Inlinability |= ArmStreamingInlinability::CalleeHasNewZA;
+
+  return Inlinability;
+}
+
+void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
+    CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
+    const FunctionDecl *Callee) const {
+  if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
+    return;
+
+  ArmStreamingInlinability Inlinability =
+      GetArmStreamingInlinability(Caller, Callee);
+
+  if (bool(Inlinability &
+           ArmStreamingInlinability::MismatchedStreamingCompatibility))
     CGM.getDiags().Report(
-        CallLoc, CalleeIsStreaming
+        CallLoc, bool(Inlinability &
+                      ArmStreamingInlinability::IncompatibleStreamingModes)
                      ? diag::err_function_always_inline_attribute_mismatch
                      : diag::warn_function_always_inline_attribute_mismatch)
         << Caller->getDeclName() << Callee->getDeclName() << "streaming";
-  if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
-    if (NewAttr->isNewZA())
-      CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
-          << Callee->getDeclName();
+  if (bool(Inlinability & ArmStreamingInlinability::CalleeHasNewZA))
+    CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
+        << Callee->getDeclName();
 }
 
 // If the target does not have floating-point registers, but we are using a
@@ -1200,6 +1235,13 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
   checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType);
 }
 
+bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
+    const FunctionDecl *Caller, const FunctionDecl *Callee) const {
+  return Caller && Callee &&
+         GetArmStreamingInlinability(Caller, Callee) !=
+             ArmStreamingInlinability::Ok;
+}
+
 void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
                                              unsigned Index,
                                              raw_ostream &Out) const {
diff --git a/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c b/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c
new file mode 100644
index 00000000000000..33ff8f33ca43f3
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -o - | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+extern void was_inlined(void);
+
+#define __flatten  __attribute__((flatten))
+void fn(void) { was_inlined(); }
+void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); }
+void fn_streaming(void) __arm_streaming { was_inlined(); }
+__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); }
+__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); }
+
+__flatten
+void caller(void) {
+    fn();
+    fn_streaming_compatible();
+    fn_streaming();
+    fn_locally_streaming();
+    fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming
+//  CHECK-NEXT:   call void @fn_locally_streaming
+//  CHECK-NEXT:   call void @fn_streaming_new_za
+
+__flatten void caller_streaming_compatible(void) __arm_streaming_compatible {
+    fn();
+    fn_streaming_compatible();
+    fn_streaming();
+    fn_locally_streaming();
+    fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_streaming_compatible()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @fn
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming
+//  CHECK-NEXT:   call void @fn_locally_streaming
+//  CHECK-NEXT:   call void @fn_streaming_new_za
+
+__flatten void caller_streaming(void) __arm_streaming {
+    fn();
+    fn_streaming_compatible();
+    fn_streaming();
+    fn_locally_streaming();
+    fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_streaming()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @fn
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming_new_za
+
+__flatten __arm_locally_streaming
+void caller_locally_streaming(void) {
+    fn();
+    fn_streaming_compatible();
+    fn_streaming();
+    fn_locally_streaming();
+    fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_locally_streaming()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @fn
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming_new_za

>From cbbe0397324f4e6dec4b6e6a802be37370462e56 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 21 Nov 2024 12:03:03 +0000
Subject: [PATCH 2/6] Add note to `TargetInfo.h`

---
 clang/lib/CodeGen/TargetInfo.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 23ff476b0e33ce..75d70b0116acd2 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -101,6 +101,15 @@ class TargetCodeGenInfo {
   /// Returns true if inlining the function call would produce incorrect code
   /// for the current target and should be ignored (even with the always_inline
   /// or flatten attributes).
+  ///
+  /// Note: This probably should be handled in LLVM. However, the `alwaysinline`
+  /// attribute currently means the inliner will ignore mismatched attributes
+  /// (which sometimes can generate invalid code). So, this hook allows targets
+  /// to avoid adding the `alwaysinline` attributes based on attributes or other
+  /// target-specific reasons.
+  ///
+  /// See previous discussion here:
+  /// https://discourse.llvm.org/t/rfc-avoid-inlining-alwaysinline-functions-when-they-cannot-be-inlined/79528
   virtual bool
   wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller,
                                       const FunctionDecl *Callee) const {

>From fdfc161722ffcdcd5878f6082c11304f4168cfb6 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 21 Nov 2024 14:08:22 +0000
Subject: [PATCH 3/6] Also handle `[[clang::always_inline]]` statement
 attributes

Change-Id: I48ea9dc200cbb60cb1bacd2873cc015f200b0f32
---
 clang/lib/CodeGen/CGCall.cpp                  |  5 +-
 clang/lib/CodeGen/TargetInfo.h                | 10 +--
 .../AArch64/sme-inline-stmt-streaming-attrs.c | 72 +++++++++++++++++++
 3 files changed, 81 insertions(+), 6 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/sme-inline-stmt-streaming-attrs.c

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index b8a968fdf4e9eb..4b2cf7f4de3610 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5690,7 +5690,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline);
 
   // Add call-site always_inline attribute if exists.
-  if (InAlwaysInlineAttributedStmt)
+  // Note: This corresponds to the [[clang::always_inline]] statement attribute.
+  if (InAlwaysInlineAttributedStmt &&
+      !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI(
+          CallerDecl, CalleeDecl))
     Attrs =
         Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
 
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 75d70b0116acd2..ab3142bdea684e 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -102,11 +102,11 @@ class TargetCodeGenInfo {
   /// for the current target and should be ignored (even with the always_inline
   /// or flatten attributes).
   ///
-  /// Note: This probably should be handled in LLVM. However, the `alwaysinline`
-  /// attribute currently means the inliner will ignore mismatched attributes
-  /// (which sometimes can generate invalid code). So, this hook allows targets
-  /// to avoid adding the `alwaysinline` attributes based on attributes or other
-  /// target-specific reasons.
+  /// Note: This probably should be handled in LLVM. However, the LLVM
+  /// `alwaysinline` attribute currently means the inliner will ignore
+  /// mismatched attributes (which sometimes can generate invalid code). So,
+  /// this hook allows targets to avoid adding the LLVM `alwaysinline` attribute
+  /// based on C/C++ attributes or other target-specific reasons.
   ///
   /// See previous discussion here:
   /// https://discourse.llvm.org/t/rfc-avoid-inlining-alwaysinline-functions-when-they-cannot-be-inlined/79528
diff --git a/clang/test/CodeGen/AArch64/sme-inline-stmt-streaming-attrs.c b/clang/test/CodeGen/AArch64/sme-inline-stmt-streaming-attrs.c
new file mode 100644
index 00000000000000..c30ab6463847d3
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme-inline-stmt-streaming-attrs.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -o - | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+extern void was_inlined(void);
+
+void fn(void) { was_inlined(); }
+void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); }
+void fn_streaming(void) __arm_streaming { was_inlined(); }
+__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); }
+__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); }
+
+void caller(void) {
+    [[clang::always_inline]] fn();
+    [[clang::always_inline]] fn_streaming_compatible();
+    [[clang::always_inline]] fn_streaming();
+    [[clang::always_inline]] fn_locally_streaming();
+    [[clang::always_inline]] fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming
+//  CHECK-NEXT:   call void @fn_locally_streaming
+//  CHECK-NEXT:   call void @fn_streaming_new_za
+
+void caller_streaming_compatible(void) __arm_streaming_compatible {
+    [[clang::always_inline]] fn();
+    [[clang::always_inline]] fn_streaming_compatible();
+    [[clang::always_inline]] fn_streaming();
+    [[clang::always_inline]] fn_locally_streaming();
+    [[clang::always_inline]] fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_streaming_compatible()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @fn
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming
+//  CHECK-NEXT:   call void @fn_locally_streaming
+//  CHECK-NEXT:   call void @fn_streaming_new_za
+
+void caller_streaming(void) __arm_streaming {
+    [[clang::always_inline]] fn();
+    [[clang::always_inline]] fn_streaming_compatible();
+    [[clang::always_inline]] fn_streaming();
+    [[clang::always_inline]] fn_locally_streaming();
+    [[clang::always_inline]] fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_streaming()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @fn
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming_new_za
+
+__arm_locally_streaming
+void caller_locally_streaming(void) {
+    [[clang::always_inline]] fn();
+    [[clang::always_inline]] fn_streaming_compatible();
+    [[clang::always_inline]] fn_streaming();
+    [[clang::always_inline]] fn_locally_streaming();
+    [[clang::always_inline]] fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_locally_streaming()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @fn
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming_new_za

>From a1c59ac42daee22e73c71bb85be664cc00155583 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 21 Nov 2024 15:05:25 +0000
Subject: [PATCH 4/6] Fixups

---
 clang/lib/CodeGen/Targets/AArch64.cpp         | 48 ++++++-------
 ...c => sme-inline-callees-streaming-attrs.c} | 62 +++++++++-------
 .../AArch64/sme-inline-stmt-streaming-attrs.c | 72 -------------------
 3 files changed, 59 insertions(+), 123 deletions(-)
 rename clang/test/CodeGen/AArch64/{sme-flatten-streaming-attrs.c => sme-inline-callees-streaming-attrs.c} (55%)
 delete mode 100644 clang/test/CodeGen/AArch64/sme-inline-stmt-streaming-attrs.c

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index a9ea84b6575f92..a13313c6e00290 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -1146,20 +1146,19 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
   }
 }
 
-enum class ArmStreamingInlinability : uint8_t {
+enum ArmSMEInlinability : uint8_t {
   Ok = 0,
-  IncompatibleStreamingModes = 1,
   MismatchedStreamingCompatibility = 1 << 1,
-  CalleeHasNewZA = 1 << 2,
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/CalleeHasNewZA),
+  IncompatibleStreamingModes = 1,
+  CalleeRequiresNewZA = 1 << 2,
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/CalleeRequiresNewZA),
 };
 
-/// Determines if there are any streaming ABI issues with inlining \p Callee
-/// into \p Caller. Returns the issues in the ArmStreamingInlinability bit enum
-/// (multiple bits can be set).
-static ArmStreamingInlinability
-GetArmStreamingInlinability(const FunctionDecl *Caller,
-                            const FunctionDecl *Callee) {
+/// Determines if there are any Arm SME ABI issues with inlining \p Callee into
+/// \p Caller. Returns the issues in the ArmSMEInlinability bit enum (multiple
+/// bits can be set).
+static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
+                                                const FunctionDecl *Callee) {
   bool CallerIsStreaming =
       IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
   bool CalleeIsStreaming =
@@ -1167,17 +1166,17 @@ GetArmStreamingInlinability(const FunctionDecl *Caller,
   bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
   bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
 
-  ArmStreamingInlinability Inlinability = ArmStreamingInlinability::Ok;
+  ArmSMEInlinability Inlinability = ArmSMEInlinability::Ok;
 
   if (!CalleeIsStreamingCompatible &&
       (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
-    Inlinability |= ArmStreamingInlinability::MismatchedStreamingCompatibility;
+    Inlinability |= ArmSMEInlinability::MismatchedStreamingCompatibility;
     if (CalleeIsStreaming)
-      Inlinability |= ArmStreamingInlinability::IncompatibleStreamingModes;
+      Inlinability |= ArmSMEInlinability::IncompatibleStreamingModes;
   }
   if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
     if (NewAttr->isNewZA())
-      Inlinability |= ArmStreamingInlinability::CalleeHasNewZA;
+      Inlinability |= ArmSMEInlinability::CalleeRequiresNewZA;
 
   return Inlinability;
 }
@@ -1188,18 +1187,18 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
   if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
     return;
 
-  ArmStreamingInlinability Inlinability =
-      GetArmStreamingInlinability(Caller, Callee);
+  ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee);
 
-  if (bool(Inlinability &
-           ArmStreamingInlinability::MismatchedStreamingCompatibility))
+  if ((Inlinability & ArmSMEInlinability::MismatchedStreamingCompatibility) !=
+      0)
     CGM.getDiags().Report(
-        CallLoc, bool(Inlinability &
-                      ArmStreamingInlinability::IncompatibleStreamingModes)
-                     ? diag::err_function_always_inline_attribute_mismatch
-                     : diag::warn_function_always_inline_attribute_mismatch)
+        CallLoc,
+        (Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) != 0
+            ? diag::err_function_always_inline_attribute_mismatch
+            : diag::warn_function_always_inline_attribute_mismatch)
         << Caller->getDeclName() << Callee->getDeclName() << "streaming";
-  if (bool(Inlinability & ArmStreamingInlinability::CalleeHasNewZA))
+
+  if ((Inlinability & ArmSMEInlinability::CalleeRequiresNewZA) != 0)
     CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
         << Callee->getDeclName();
 }
@@ -1238,8 +1237,7 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
 bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
     const FunctionDecl *Caller, const FunctionDecl *Callee) const {
   return Caller && Callee &&
-         GetArmStreamingInlinability(Caller, Callee) !=
-             ArmStreamingInlinability::Ok;
+         GetArmSMEInlinability(Caller, Callee) != ArmSMEInlinability::Ok;
 }
 
 void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
diff --git a/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c b/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c
similarity index 55%
rename from clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c
rename to clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c
index 33ff8f33ca43f3..ce6f203631fc5c 100644
--- a/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c
+++ b/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c
@@ -1,23 +1,33 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -DUSE_FLATTEN -o - | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s
 
 // REQUIRES: aarch64-registered-target
 
 extern void was_inlined(void);
 
-#define __flatten  __attribute__((flatten))
+#if defined(USE_FLATTEN)
+    #define FN_ATTR __attribute__((flatten))
+    #define STMT_ATTR
+#elif defined(USE_ALWAYS_INLINE_STMT)
+    #define FN_ATTR
+    #define STMT_ATTR [[clang::always_inline]]
+#else
+    #error Expected USE_FLATTEN or USE_ALWAYS_INLINE_STMT to be defined.
+#endif
+
 void fn(void) { was_inlined(); }
 void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); }
 void fn_streaming(void) __arm_streaming { was_inlined(); }
 __arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); }
 __arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); }
 
-__flatten
+FN_ATTR
 void caller(void) {
-    fn();
-    fn_streaming_compatible();
-    fn_streaming();
-    fn_locally_streaming();
-    fn_streaming_new_za();
+    STMT_ATTR fn();
+    STMT_ATTR fn_streaming_compatible();
+    STMT_ATTR fn_streaming();
+    STMT_ATTR fn_locally_streaming();
+    STMT_ATTR fn_streaming_new_za();
 }
 // CHECK-LABEL: void @caller()
 //  CHECK-NEXT: entry:
@@ -27,12 +37,12 @@ void caller(void) {
 //  CHECK-NEXT:   call void @fn_locally_streaming
 //  CHECK-NEXT:   call void @fn_streaming_new_za
 
-__flatten void caller_streaming_compatible(void) __arm_streaming_compatible {
-    fn();
-    fn_streaming_compatible();
-    fn_streaming();
-    fn_locally_streaming();
-    fn_streaming_new_za();
+FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible {
+    STMT_ATTR fn();
+    STMT_ATTR fn_streaming_compatible();
+    STMT_ATTR fn_streaming();
+    STMT_ATTR fn_locally_streaming();
+    STMT_ATTR fn_streaming_new_za();
 }
 // CHECK-LABEL: void @caller_streaming_compatible()
 //  CHECK-NEXT: entry:
@@ -42,12 +52,12 @@ __flatten void caller_streaming_compatible(void) __arm_streaming_compatible {
 //  CHECK-NEXT:   call void @fn_locally_streaming
 //  CHECK-NEXT:   call void @fn_streaming_new_za
 
-__flatten void caller_streaming(void) __arm_streaming {
-    fn();
-    fn_streaming_compatible();
-    fn_streaming();
-    fn_locally_streaming();
-    fn_streaming_new_za();
+FN_ATTR void caller_streaming(void) __arm_streaming {
+    STMT_ATTR fn();
+    STMT_ATTR fn_streaming_compatible();
+    STMT_ATTR fn_streaming();
+    STMT_ATTR fn_locally_streaming();
+    STMT_ATTR fn_streaming_new_za();
 }
 // CHECK-LABEL: void @caller_streaming()
 //  CHECK-NEXT: entry:
@@ -57,13 +67,13 @@ __flatten void caller_streaming(void) __arm_streaming {
 //  CHECK-NEXT:   call void @was_inlined
 //  CHECK-NEXT:   call void @fn_streaming_new_za
 
-__flatten __arm_locally_streaming
+FN_ATTR __arm_locally_streaming
 void caller_locally_streaming(void) {
-    fn();
-    fn_streaming_compatible();
-    fn_streaming();
-    fn_locally_streaming();
-    fn_streaming_new_za();
+    STMT_ATTR fn();
+    STMT_ATTR fn_streaming_compatible();
+    STMT_ATTR fn_streaming();
+    STMT_ATTR fn_locally_streaming();
+    STMT_ATTR fn_streaming_new_za();
 }
 // CHECK-LABEL: void @caller_locally_streaming()
 //  CHECK-NEXT: entry:
diff --git a/clang/test/CodeGen/AArch64/sme-inline-stmt-streaming-attrs.c b/clang/test/CodeGen/AArch64/sme-inline-stmt-streaming-attrs.c
deleted file mode 100644
index c30ab6463847d3..00000000000000
--- a/clang/test/CodeGen/AArch64/sme-inline-stmt-streaming-attrs.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -o - | FileCheck %s
-
-// REQUIRES: aarch64-registered-target
-
-extern void was_inlined(void);
-
-void fn(void) { was_inlined(); }
-void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); }
-void fn_streaming(void) __arm_streaming { was_inlined(); }
-__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); }
-__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); }
-
-void caller(void) {
-    [[clang::always_inline]] fn();
-    [[clang::always_inline]] fn_streaming_compatible();
-    [[clang::always_inline]] fn_streaming();
-    [[clang::always_inline]] fn_locally_streaming();
-    [[clang::always_inline]] fn_streaming_new_za();
-}
-// CHECK-LABEL: void @caller()
-//  CHECK-NEXT: entry:
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @fn_streaming
-//  CHECK-NEXT:   call void @fn_locally_streaming
-//  CHECK-NEXT:   call void @fn_streaming_new_za
-
-void caller_streaming_compatible(void) __arm_streaming_compatible {
-    [[clang::always_inline]] fn();
-    [[clang::always_inline]] fn_streaming_compatible();
-    [[clang::always_inline]] fn_streaming();
-    [[clang::always_inline]] fn_locally_streaming();
-    [[clang::always_inline]] fn_streaming_new_za();
-}
-// CHECK-LABEL: void @caller_streaming_compatible()
-//  CHECK-NEXT: entry:
-//  CHECK-NEXT:   call void @fn
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @fn_streaming
-//  CHECK-NEXT:   call void @fn_locally_streaming
-//  CHECK-NEXT:   call void @fn_streaming_new_za
-
-void caller_streaming(void) __arm_streaming {
-    [[clang::always_inline]] fn();
-    [[clang::always_inline]] fn_streaming_compatible();
-    [[clang::always_inline]] fn_streaming();
-    [[clang::always_inline]] fn_locally_streaming();
-    [[clang::always_inline]] fn_streaming_new_za();
-}
-// CHECK-LABEL: void @caller_streaming()
-//  CHECK-NEXT: entry:
-//  CHECK-NEXT:   call void @fn
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @fn_streaming_new_za
-
-__arm_locally_streaming
-void caller_locally_streaming(void) {
-    [[clang::always_inline]] fn();
-    [[clang::always_inline]] fn_streaming_compatible();
-    [[clang::always_inline]] fn_streaming();
-    [[clang::always_inline]] fn_locally_streaming();
-    [[clang::always_inline]] fn_streaming_new_za();
-}
-// CHECK-LABEL: void @caller_locally_streaming()
-//  CHECK-NEXT: entry:
-//  CHECK-NEXT:   call void @fn
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @fn_streaming_new_za

>From a3e10e332c24250010a79973b01590d729d471ba Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 26 Nov 2024 11:28:40 +0000
Subject: [PATCH 5/6] Fixups

---
 clang/lib/CodeGen/Targets/AArch64.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index a13313c6e00290..5561df8e2dbf2b 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -1146,10 +1146,10 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
   }
 }
 
-enum ArmSMEInlinability : uint8_t {
+enum class ArmSMEInlinability : uint8_t {
   Ok = 0,
-  MismatchedStreamingCompatibility = 1 << 1,
-  IncompatibleStreamingModes = 1,
+  MismatchedStreamingCompatibility = 1 << 0,
+  IncompatibleStreamingModes = 1 << 1,
   CalleeRequiresNewZA = 1 << 2,
   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/CalleeRequiresNewZA),
 };
@@ -1190,15 +1190,17 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
   ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee);
 
   if ((Inlinability & ArmSMEInlinability::MismatchedStreamingCompatibility) !=
-      0)
+      ArmSMEInlinability::Ok)
     CGM.getDiags().Report(
         CallLoc,
-        (Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) != 0
+        (Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) !=
+                ArmSMEInlinability::Ok
             ? diag::err_function_always_inline_attribute_mismatch
             : diag::warn_function_always_inline_attribute_mismatch)
         << Caller->getDeclName() << Callee->getDeclName() << "streaming";
 
-  if ((Inlinability & ArmSMEInlinability::CalleeRequiresNewZA) != 0)
+  if ((Inlinability & ArmSMEInlinability::CalleeRequiresNewZA) !=
+      ArmSMEInlinability::Ok)
     CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
         << Callee->getDeclName();
 }

>From 9b439cd0678519e83006cd02eb8b46875c8bee92 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 26 Nov 2024 13:56:23 +0000
Subject: [PATCH 6/6] Tweak enum

---
 clang/lib/CodeGen/Targets/AArch64.cpp | 32 +++++++++++++++------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 5561df8e2dbf2b..be33e26f047841 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -1148,15 +1148,18 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
 
 enum class ArmSMEInlinability : uint8_t {
   Ok = 0,
-  MismatchedStreamingCompatibility = 1 << 0,
-  IncompatibleStreamingModes = 1 << 1,
-  CalleeRequiresNewZA = 1 << 2,
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/CalleeRequiresNewZA),
+  ErrorCalleeRequiresNewZA = 1 << 0,
+  WarnIncompatibleStreamingModes = 1 << 1,
+  ErrorIncompatibleStreamingModes = 1 << 2,
+
+  IncompatibleStreamingModes =
+      WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes,
+
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/ErrorIncompatibleStreamingModes),
 };
 
 /// Determines if there are any Arm SME ABI issues with inlining \p Callee into
-/// \p Caller. Returns the issues in the ArmSMEInlinability bit enum (multiple
-/// bits can be set).
+/// \p Caller. Returns the issue (if any) in the ArmSMEInlinability bit enum.
 static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
                                                 const FunctionDecl *Callee) {
   bool CallerIsStreaming =
@@ -1170,13 +1173,14 @@ static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
 
   if (!CalleeIsStreamingCompatible &&
       (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
-    Inlinability |= ArmSMEInlinability::MismatchedStreamingCompatibility;
     if (CalleeIsStreaming)
-      Inlinability |= ArmSMEInlinability::IncompatibleStreamingModes;
+      Inlinability |= ArmSMEInlinability::ErrorIncompatibleStreamingModes;
+    else
+      Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes;
   }
   if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
     if (NewAttr->isNewZA())
-      Inlinability |= ArmSMEInlinability::CalleeRequiresNewZA;
+      Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA;
 
   return Inlinability;
 }
@@ -1189,18 +1193,18 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
 
   ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee);
 
-  if ((Inlinability & ArmSMEInlinability::MismatchedStreamingCompatibility) !=
+  if ((Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) !=
       ArmSMEInlinability::Ok)
     CGM.getDiags().Report(
         CallLoc,
-        (Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) !=
-                ArmSMEInlinability::Ok
+        (Inlinability & ArmSMEInlinability::ErrorIncompatibleStreamingModes) ==
+                ArmSMEInlinability::ErrorIncompatibleStreamingModes
             ? diag::err_function_always_inline_attribute_mismatch
             : diag::warn_function_always_inline_attribute_mismatch)
         << Caller->getDeclName() << Callee->getDeclName() << "streaming";
 
-  if ((Inlinability & ArmSMEInlinability::CalleeRequiresNewZA) !=
-      ArmSMEInlinability::Ok)
+  if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZA) ==
+      ArmSMEInlinability::ErrorCalleeRequiresNewZA)
     CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
         << Callee->getDeclName();
 }



More information about the cfe-commits mailing list