[clang] [clang][SME] Ignore flatten for callees with mismatched streaming attributes (PR #116391)
Benjamin Maxwell via cfe-commits
cfe-commits at lists.llvm.org
Thu Nov 21 04:04:51 PST 2024
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/116391
>From 90daf9c544bcb776c8a68ad504ba5eda50eafe8a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 15 Nov 2024 14:35:41 +0000
Subject: [PATCH 1/2] [clang][SME] Ignore flatten for callees mismatched
streaming attributes
If `__attribute__((flatten))` is used on a function don't inline any
callees with incompatible streaming attributes. Without this check,
clang may produce incorrect code when `flatten` is used in code with
streaming functions.
Note: The docs for flatten say it can be ignored when inlining is
impossible: "causes calls within the attributed function to be inlined
unless it is impossible to do so".
---
clang/lib/CodeGen/CGCall.cpp | 11 ++-
clang/lib/CodeGen/TargetInfo.h | 9 +++
clang/lib/CodeGen/Targets/AArch64.cpp | 64 +++++++++++++---
.../AArch64/sme-flatten-streaming-attrs.c | 74 +++++++++++++++++++
4 files changed, 143 insertions(+), 15 deletions(-)
create mode 100644 clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 8f4f5d3ed81601..b8a968fdf4e9eb 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5112,9 +5112,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Some architectures (such as x86-64) have the ABI changed based on
// attribute-target/features. Give them a chance to diagnose.
- CGM.getTargetCodeGenInfo().checkFunctionCallABI(
- CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl),
- dyn_cast_or_null<FunctionDecl>(TargetDecl), CallArgs, RetTy);
+ const FunctionDecl *CallerDecl = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
+ const FunctionDecl *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl);
+ CGM.getTargetCodeGenInfo().checkFunctionCallABI(CGM, Loc, CallerDecl,
+ CalleeDecl, CallArgs, RetTy);
// 1. Set up the arguments.
@@ -5705,7 +5706,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// FIXME: should this really take priority over __try, below?
if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
!InNoInlineAttributedStmt &&
- !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) {
+ !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>()) &&
+ !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI(
+ CallerDecl, CalleeDecl)) {
Attrs =
Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
}
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 373f8b8a80fdb1..23ff476b0e33ce 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -98,6 +98,15 @@ class TargetCodeGenInfo {
const CallArgList &Args,
QualType ReturnType) const {}
+ /// Returns true if inlining the function call would produce incorrect code
+ /// for the current target and should be ignored (even with the always_inline
+ /// or flatten attributes).
+ virtual bool
+ wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller,
+ const FunctionDecl *Callee) const {
+ return false;
+ }
+
/// Determines the size of struct _Unwind_Exception on this platform,
/// in 8-bit units. The Itanium ABI defines this as:
/// struct _Unwind_Exception {
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 9320c6ef06efab..a9ea84b6575f92 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -177,6 +177,9 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
const FunctionDecl *Callee, const CallArgList &Args,
QualType ReturnType) const override;
+ bool wouldInliningViolateFunctionCallABI(
+ const FunctionDecl *Caller, const FunctionDecl *Callee) const override;
+
private:
// Diagnose calls between functions with incompatible Streaming SVE
// attributes.
@@ -1143,12 +1146,20 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
}
}
-void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
- CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
- const FunctionDecl *Callee) const {
- if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
- return;
+enum class ArmStreamingInlinability : uint8_t {
+ Ok = 0,
+ IncompatibleStreamingModes = 1,
+ MismatchedStreamingCompatibility = 1 << 1,
+ CalleeHasNewZA = 1 << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/CalleeHasNewZA),
+};
+/// Determines if there are any streaming ABI issues with inlining \p Callee
+/// into \p Caller. Returns the issues in the ArmStreamingInlinability bit enum
+/// (multiple bits can be set).
+static ArmStreamingInlinability
+GetArmStreamingInlinability(const FunctionDecl *Caller,
+ const FunctionDecl *Callee) {
bool CallerIsStreaming =
IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
bool CalleeIsStreaming =
@@ -1156,17 +1167,41 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
+ ArmStreamingInlinability Inlinability = ArmStreamingInlinability::Ok;
+
if (!CalleeIsStreamingCompatible &&
- (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible))
+ (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
+ Inlinability |= ArmStreamingInlinability::MismatchedStreamingCompatibility;
+ if (CalleeIsStreaming)
+ Inlinability |= ArmStreamingInlinability::IncompatibleStreamingModes;
+ }
+ if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
+ if (NewAttr->isNewZA())
+ Inlinability |= ArmStreamingInlinability::CalleeHasNewZA;
+
+ return Inlinability;
+}
+
+void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
+ CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
+ const FunctionDecl *Callee) const {
+ if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
+ return;
+
+ ArmStreamingInlinability Inlinability =
+ GetArmStreamingInlinability(Caller, Callee);
+
+ if (bool(Inlinability &
+ ArmStreamingInlinability::MismatchedStreamingCompatibility))
CGM.getDiags().Report(
- CallLoc, CalleeIsStreaming
+ CallLoc, bool(Inlinability &
+ ArmStreamingInlinability::IncompatibleStreamingModes)
? diag::err_function_always_inline_attribute_mismatch
: diag::warn_function_always_inline_attribute_mismatch)
<< Caller->getDeclName() << Callee->getDeclName() << "streaming";
- if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
- if (NewAttr->isNewZA())
- CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
- << Callee->getDeclName();
+ if (bool(Inlinability & ArmStreamingInlinability::CalleeHasNewZA))
+ CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
+ << Callee->getDeclName();
}
// If the target does not have floating-point registers, but we are using a
@@ -1200,6 +1235,13 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType);
}
+bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
+ const FunctionDecl *Caller, const FunctionDecl *Callee) const {
+ return Caller && Callee &&
+ GetArmStreamingInlinability(Caller, Callee) !=
+ ArmStreamingInlinability::Ok;
+}
+
void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
unsigned Index,
raw_ostream &Out) const {
diff --git a/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c b/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c
new file mode 100644
index 00000000000000..33ff8f33ca43f3
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme-flatten-streaming-attrs.c
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme %s -o - | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+extern void was_inlined(void);
+
+#define __flatten __attribute__((flatten))
+void fn(void) { was_inlined(); }
+void fn_streaming_compatible(void) __arm_streaming_compatible { was_inlined(); }
+void fn_streaming(void) __arm_streaming { was_inlined(); }
+__arm_locally_streaming void fn_locally_streaming(void) { was_inlined(); }
+__arm_new("za") void fn_streaming_new_za(void) __arm_streaming { was_inlined(); }
+
+__flatten
+void caller(void) {
+ fn();
+ fn_streaming_compatible();
+ fn_streaming();
+ fn_locally_streaming();
+ fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @was_inlined
+// CHECK-NEXT: call void @was_inlined
+// CHECK-NEXT: call void @fn_streaming
+// CHECK-NEXT: call void @fn_locally_streaming
+// CHECK-NEXT: call void @fn_streaming_new_za
+
+__flatten void caller_streaming_compatible(void) __arm_streaming_compatible {
+ fn();
+ fn_streaming_compatible();
+ fn_streaming();
+ fn_locally_streaming();
+ fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_streaming_compatible()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @fn
+// CHECK-NEXT: call void @was_inlined
+// CHECK-NEXT: call void @fn_streaming
+// CHECK-NEXT: call void @fn_locally_streaming
+// CHECK-NEXT: call void @fn_streaming_new_za
+
+__flatten void caller_streaming(void) __arm_streaming {
+ fn();
+ fn_streaming_compatible();
+ fn_streaming();
+ fn_locally_streaming();
+ fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_streaming()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @fn
+// CHECK-NEXT: call void @was_inlined
+// CHECK-NEXT: call void @was_inlined
+// CHECK-NEXT: call void @was_inlined
+// CHECK-NEXT: call void @fn_streaming_new_za
+
+__flatten __arm_locally_streaming
+void caller_locally_streaming(void) {
+ fn();
+ fn_streaming_compatible();
+ fn_streaming();
+ fn_locally_streaming();
+ fn_streaming_new_za();
+}
+// CHECK-LABEL: void @caller_locally_streaming()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @fn
+// CHECK-NEXT: call void @was_inlined
+// CHECK-NEXT: call void @was_inlined
+// CHECK-NEXT: call void @was_inlined
+// CHECK-NEXT: call void @fn_streaming_new_za
>From cbbe0397324f4e6dec4b6e6a802be37370462e56 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 21 Nov 2024 12:03:03 +0000
Subject: [PATCH 2/2] Add note to `TargetInfo.h`
---
clang/lib/CodeGen/TargetInfo.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 23ff476b0e33ce..75d70b0116acd2 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -101,6 +101,15 @@ class TargetCodeGenInfo {
/// Returns true if inlining the function call would produce incorrect code
/// for the current target and should be ignored (even with the always_inline
/// or flatten attributes).
+ ///
+ /// Note: This probably should be handled in LLVM. However, the `alwaysinline`
+ /// attribute currently means the inliner will ignore mismatched attributes
+ /// (which sometimes can generate invalid code). So, this hook allows targets
+ /// to avoid adding the `alwaysinline` attributes based on attributes or other
+ /// target-specific reasons.
+ ///
+ /// See previous discussion here:
+ /// https://discourse.llvm.org/t/rfc-avoid-inlining-alwaysinline-functions-when-they-cannot-be-inlined/79528
virtual bool
wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller,
const FunctionDecl *Callee) const {
More information about the cfe-commits
mailing list