[clang] [clang][SME] Ignore flatten for callees with mismatched streaming attributes (PR #116391)

Benjamin Maxwell via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 21 04:04:51 PST 2024


https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/116391

>From 90daf9c544bcb776c8a68ad504ba5eda50eafe8a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 15 Nov 2024 14:35:41 +0000
Subject: [PATCH 1/2] [clang][SME] Ignore flatten for callees mismatched
 streaming attributes

If `__attribute__((flatten))` is used on a function don't inline any
callees with incompatible streaming attributes. Without this check,
clang may produce incorrect code when `flatten` is used in code with
streaming functions.

Note: The docs for flatten say it can be ignored when inlining is
impossible: "causes calls within the attributed function to be inlined
unless it is impossible to do so".
---
 clang/lib/CodeGen/CGCall.cpp                  | 11 ++-
 clang/lib/CodeGen/TargetInfo.h                |  9 +++
 clang/lib/CodeGen/Targets/AArch64.cpp         | 64 +++++++++++++---
 .../AArch64/sme-flatten-streaming-attrs.c     | 74 +++++++++++++++++++
 4 files changed, 143 insertions(+), 15 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 8f4f5d3ed81601..b8a968fdf4e9eb 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5112,9 +5112,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
   // Some architectures (such as x86-64) have the ABI changed based on
   // attribute-target/features. Give them a chance to diagnose.
-  CGM.getTargetCodeGenInfo().checkFunctionCallABI(
-      CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl),
-      dyn_cast_or_null<FunctionDecl>(TargetDecl), CallArgs, RetTy);
+  const FunctionDecl *CallerDecl = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
+  const FunctionDecl *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl);
+  CGM.getTargetCodeGenInfo().checkFunctionCallABI(CGM, Loc, CallerDecl,
+                                                  CalleeDecl, CallArgs, RetTy);
 
   // 1. Set up the arguments.
 
@@ -5705,7 +5706,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   // FIXME: should this really take priority over __try, below?
   if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
       !InNoInlineAttributedStmt &&
-      !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) {
+      !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>()) &&
+      !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI(
+          CallerDecl, CalleeDecl)) {
     Attrs =
         Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
   }
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 373f8b8a80fdb1..23ff476b0e33ce 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -98,6 +98,15 @@ class TargetCodeGenInfo {
                                     const CallArgList &Args,
                                     QualType ReturnType) const {}
 
+  /// Returns true if inlining the function call would produce incorrect code
+  /// for the current target and should be ignored (even with the always_inline
+  /// or flatten attributes).
+  virtual bool
+  wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller,
+                                      const FunctionDecl *Callee) const {
+    return false;
+  }
+
   /// Determines the size of struct _Unwind_Exception on this platform,
   /// in 8-bit units.  The Itanium ABI defines this as:
   ///   struct _Unwind_Exception {
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 9320c6ef06efab..a9ea84b6575f92 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -177,6 +177,9 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
                             const FunctionDecl *Callee, const CallArgList &Args,
                             QualType ReturnType) const override;
 
+  bool wouldInliningViolateFunctionCallABI(
+      const FunctionDecl *Caller, const FunctionDecl *Callee) const override;
+
 private:
   // Diagnose calls between functions with incompatible Streaming SVE
   // attributes.
@@ -1143,12 +1146,20 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
   }
 }
 
-void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
-    CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
-    const FunctionDecl *Callee) const {
-  if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
-    return;
+enum class ArmStreamingInlinability : uint8_t {
+  Ok = 0,
+  IncompatibleStreamingModes = 1,
+  MismatchedStreamingCompatibility = 1 << 1,
+  CalleeHasNewZA = 1 << 2,
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/CalleeHasNewZA),
+};
 
+/// Determines if there are any streaming ABI issues with inlining \p Callee
+/// into \p Caller. Returns the issues in the ArmStreamingInlinability bit enum
+/// (multiple bits can be set).
+static ArmStreamingInlinability
+GetArmStreamingInlinability(const FunctionDecl *Caller,
+                            const FunctionDecl *Callee) {
   bool CallerIsStreaming =
       IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
   bool CalleeIsStreaming =
@@ -1156,17 +1167,41 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
   bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
   bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
 
+  ArmStreamingInlinability Inlinability = ArmStreamingInlinability::Ok;
+
   if (!CalleeIsStreamingCompatible &&
-      (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible))
+      (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
+    Inlinability |= ArmStreamingInlinability::MismatchedStreamingCompatibility;
+    if (CalleeIsStreaming)
+      Inlinability |= ArmStreamingInlinability::IncompatibleStreamingModes;
+  }
+  if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
+    if (NewAttr->isNewZA())
+      Inlinability |= ArmStreamingInlinability::CalleeHasNewZA;
+
+  return Inlinability;
+}
+
+void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
+    CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
+    const FunctionDecl *Callee) const {
+  if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
+    return;
+
+  ArmStreamingInlinability Inlinability =
+      GetArmStreamingInlinability(Caller, Callee);
+
+  if (bool(Inlinability &
+           ArmStreamingInlinability::MismatchedStreamingCompatibility))
     CGM.getDiags().Report(
-        CallLoc, CalleeIsStreaming
+        CallLoc, bool(Inlinability &
+                      ArmStreamingInlinability::IncompatibleStreamingModes)
                      ? diag::err_function_always_inline_attribute_mismatch
                      : diag::warn_function_always_inline_attribute_mismatch)
         << Caller->getDeclName() << Callee->getDeclName() << "streaming";
-  if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
-    if (NewAttr->isNewZA())
-      CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
-          << Callee->getDeclName();
+  if (bool(Inlinability & ArmStreamingInlinability::CalleeHasNewZA))
+    CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
+        << Callee->getDeclName();
 }
 
 // If the target does not have floating-point registers, but we are using a
@@ -1200,6 +1235,13 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
   checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType);
 }
 
+bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
+    const FunctionDecl *Caller, const FunctionDecl *Callee) const {
+  return Caller && Callee &&
+         GetArmStreamingInlinability(Caller, Callee) !=
+             ArmStreamingInlinability::Ok;
+}
+
 void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
                                              unsigned Index,
                                              raw_ostream &Out) const {
diff --git a/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c b/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c
new file mode 100644
index 00000000000000..33ff8f33ca43f3
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -o - | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+extern void was_inlined(void);
+
+#define __flatten  __attribute__((flatten))
+void fn(void) { was_inlined(); }
+void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); }
+void fn_streaming(void) __arm_streaming { was_inlined(); }
+__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); }
+__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); }
+
+__flatten
+void caller(void) {
+    fn();
+    fn_streaming_compatible();
+    fn_streaming();
+    fn_locally_streaming();
+    fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming
+//  CHECK-NEXT:   call void @fn_locally_streaming
+//  CHECK-NEXT:   call void @fn_streaming_new_za
+
+__flatten void caller_streaming_compatible(void) __arm_streaming_compatible {
+    fn();
+    fn_streaming_compatible();
+    fn_streaming();
+    fn_locally_streaming();
+    fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_streaming_compatible()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @fn
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming
+//  CHECK-NEXT:   call void @fn_locally_streaming
+//  CHECK-NEXT:   call void @fn_streaming_new_za
+
+__flatten void caller_streaming(void) __arm_streaming {
+    fn();
+    fn_streaming_compatible();
+    fn_streaming();
+    fn_locally_streaming();
+    fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_streaming()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @fn
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming_new_za
+
+__flatten __arm_locally_streaming
+void caller_locally_streaming(void) {
+    fn();
+    fn_streaming_compatible();
+    fn_streaming();
+    fn_locally_streaming();
+    fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_locally_streaming()
+//  CHECK-NEXT: entry:
+//  CHECK-NEXT:   call void @fn
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @was_inlined
+//  CHECK-NEXT:   call void @fn_streaming_new_za

>From cbbe0397324f4e6dec4b6e6a802be37370462e56 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 21 Nov 2024 12:03:03 +0000
Subject: [PATCH 2/2] Add note to `TargetInfo.h`

---
 clang/lib/CodeGen/TargetInfo.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 23ff476b0e33ce..75d70b0116acd2 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -101,6 +101,15 @@ class TargetCodeGenInfo {
   /// Returns true if inlining the function call would produce incorrect code
   /// for the current target and should be ignored (even with the always_inline
   /// or flatten attributes).
+  ///
+  /// Note: This probably should be handled in LLVM. However, the `alwaysinline`
+  /// attribute currently means the inliner will ignore mismatched attributes
+  /// (which sometimes can generate invalid code). So, this hook allows targets
+  /// to avoid adding the `alwaysinline` attributes based on attributes or other
+  /// target-specific reasons.
+  ///
+  /// See previous discussion here:
+  /// https://discourse.llvm.org/t/rfc-avoid-inlining-alwaysinline-functions-when-they-cannot-be-inlined/79528
   virtual bool
   wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller,
                                       const FunctionDecl *Callee) const {



More information about the cfe-commits mailing list