[llvm] 64bef3d - [AArch64][SME] Disable inlining when SME attributes require smstart/smstop or lazy-save.

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 21 01:35:56 PDT 2022


Author: David Sherwood
Date: 2022-09-21T09:35:47+01:00
New Revision: 64bef3d5688b36a852d7aa3ee2276d4a9856af9a

URL: https://github.com/llvm/llvm-project/commit/64bef3d5688b36a852d7aa3ee2276d4a9856af9a
DIFF: https://github.com/llvm/llvm-project/commit/64bef3d5688b36a852d7aa3ee2276d4a9856af9a.diff

LOG: [AArch64][SME] Disable inlining when SME attributes require smstart/smstop or lazy-save.

Inlining must be disabled when the call-site needs to toggle PSTATE.SM or
when the callee's function body is executed in a different streaming mode than
its caller. This is needed because function calls are the boundaries for
streaming mode changes.

More details about the SME attributes and design can be found
in D131562.

Differential Revision: https://reviews.llvm.org/D131581

Added: 
    llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
    llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 00cb4f3784fc5..d1dcbdfc9cf00 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -108,6 +108,14 @@ cl::opt<TailFoldingKind, true, cl::parser<std::string>> SVETailFolding(
 
 bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
                                          const Function *Callee) const {
+  SMEAttrs CallerAttrs(*Caller);
+  SMEAttrs CalleeAttrs(*Callee);
+  if (CallerAttrs.requiresSMChange(CalleeAttrs,
+                                   /*BodyOverridesInterface=*/true) ||
+      CallerAttrs.requiresLazySave(CalleeAttrs) ||
+      CalleeAttrs.hasNewZAInterface())
+    return false;
+
   const TargetMachine &TM = getTLI()->getTargetMachine();
 
   const FeatureBitset &CallerBits =

diff  --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
new file mode 100644
index 0000000000000..edf36d6e36d82
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
@@ -0,0 +1,377 @@
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -inline | FileCheck %s
+
+declare void @inlined_body() "aarch64_pstate_sm_compatible";
+
+;
+; Define some functions that will be called by the functions below.
+; These just call a '...body()' function. If we see the call to one of
+; these functions being replaced by '...body()', then we know it has been
+; inlined.
+;
+
+define void @normal_callee() {
+entry:
+  call void @inlined_body()
+  ret void
+}
+
+define void @streaming_callee() "aarch64_pstate_sm_enabled" {
+entry:
+  call void @inlined_body()
+  ret void
+}
+
+define void @locally_streaming_callee() "aarch64_pstate_sm_body" {
+entry:
+  call void @inlined_body()
+  ret void
+}
+
+define void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" {
+entry:
+  call void @inlined_body()
+  ret void
+}
+
+define void @streaming_compatible_locally_streaming_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+entry:
+  call void @inlined_body()
+  ret void
+}
+
+;
+; Now test that inlining only happens when their streaming modes match.
+; Test for a number of combinations, where:
+; N       Normal-interface (PSTATE.SM=0 on entry/exit)
+; S       Streaming-interface (PSTATE.SM=1 on entry/exit)
+; SC      Streaming-compatible interface
+;         (PSTATE.SM=0 or 1, unchanged on exit)
+; N + B   Normal-interface, streaming body
+;         (PSTATE.SM=0 on entry/exit, but 1 within the body of the function)
+; SC + B  Streaming-compatible-interface, streaming body
+;         (PSTATE.SM=0 or 1 on entry, unchanged on exit,
+;          but guaranteed to be 1 within the body of the function)
+
+; [x] N  -> N
+; [ ] N  -> S
+; [ ] N  -> SC
+; [ ] N  -> N + B
+; [ ] N  -> SC + B
+define void @normal_caller_normal_callee_inline() {
+; CHECK-LABEL: @normal_caller_normal_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @normal_callee()
+  ret void
+}
+
+; [ ] N  -> N
+; [x] N  -> S
+; [ ] N  -> SC
+; [ ] N  -> N + B
+; [ ] N  -> SC + B
+define void @normal_caller_streaming_callee_dont_inline() {
+; CHECK-LABEL: @normal_caller_streaming_callee_dont_inline(
+; CHECK: call void @streaming_callee()
+entry:
+  call void @streaming_callee()
+  ret void
+}
+
+; [ ] N  -> N
+; [ ] N  -> S
+; [x] N  -> SC
+; [ ] N  -> N + B
+; [ ] N  -> SC + B
+define void @normal_caller_streaming_compatible_callee_inline() {
+; CHECK-LABEL: @normal_caller_streaming_compatible_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_compatible_callee()
+  ret void
+}
+
+; [ ] N  -> N
+; [ ] N  -> S
+; [ ] N  -> SC
+; [x] N  -> N + B
+; [ ] N  -> SC + B
+define void @normal_caller_locally_streaming_callee_dont_inline() {
+; CHECK-LABEL: @normal_caller_locally_streaming_callee_dont_inline(
+; CHECK: call void @locally_streaming_callee()
+entry:
+  call void @locally_streaming_callee()
+  ret void
+}
+
+; [ ] N  -> N
+; [ ] N  -> S
+; [ ] N  -> SC
+; [ ] N  -> N + B
+; [x] N  -> SC + B
+define void @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline() {
+; CHECK-LABEL: @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline(
+; CHECK: call void @streaming_compatible_locally_streaming_callee()
+entry:
+  call void @streaming_compatible_locally_streaming_callee()
+  ret void
+}
+
+; [x] S  -> N
+; [ ] S  -> S
+; [ ] S  -> SC
+; [ ] S  -> N + B
+; [ ] S  -> SC + B
+define void @streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: @streaming_caller_normal_callee_dont_inline(
+; CHECK: call void @normal_callee()
+entry:
+  call void @normal_callee()
+  ret void
+}
+
+; [ ] S  -> N
+; [x] S  -> S
+; [ ] S  -> SC
+; [ ] S  -> N + B
+; [ ] S  -> SC + B
+define void @streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: @streaming_caller_streaming_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_callee()
+  ret void
+}
+
+; [ ] S  -> N
+; [ ] S  -> S
+; [x] S  -> SC
+; [ ] S  -> N + B
+; [ ] S  -> SC + B
+define void @streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: @streaming_caller_streaming_compatible_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_compatible_callee()
+  ret void
+}
+
+; [ ] S  -> N
+; [ ] S  -> S
+; [ ] S  -> SC
+; [x] S  -> N + B
+; [ ] S  -> SC + B
+define void @streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: @streaming_caller_locally_streaming_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @locally_streaming_callee()
+  ret void
+}
+
+; [ ] S  -> N
+; [ ] S  -> S
+; [ ] S  -> SC
+; [ ] S  -> N + B
+; [x] S  -> SC + B
+define void @streaming_caller_streaming_compatible_locally_streaming_callee_inline() "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: @streaming_caller_streaming_compatible_locally_streaming_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_compatible_locally_streaming_callee()
+  ret void
+}
+
+; [x] N + B -> N
+; [ ] N + B -> S
+; [ ] N + B -> SC
+; [ ] N + B -> N + B
+; [ ] N + B -> SC + B
+define void @locally_streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_body" {
+; CHECK-LABEL: @locally_streaming_caller_normal_callee_dont_inline(
+; CHECK: call void @normal_callee()
+entry:
+  call void @normal_callee()
+  ret void
+}
+
+; [ ] N + B -> N
+; [x] N + B -> S
+; [ ] N + B -> SC
+; [ ] N + B -> N + B
+; [ ] N + B -> SC + B
+define void @locally_streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_body" {
+; CHECK-LABEL: @locally_streaming_caller_streaming_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_callee()
+  ret void
+}
+
+; [ ] N + B -> N
+; [ ] N + B -> S
+; [x] N + B -> SC
+; [ ] N + B -> N + B
+; [ ] N + B -> SC + B
+define void @locally_streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_body" {
+; CHECK-LABEL: @locally_streaming_caller_streaming_compatible_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_compatible_callee()
+  ret void
+}
+
+; [ ] N + B -> N
+; [ ] N + B -> S
+; [ ] N + B -> SC
+; [x] N + B -> N + B
+; [ ] N + B -> SC + B
+define void @locally_streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_body" {
+; CHECK-LABEL: @locally_streaming_caller_locally_streaming_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @locally_streaming_callee()
+  ret void
+}
+
+; [ ] N + B -> N
+; [ ] N + B -> S
+; [ ] N + B -> SC
+; [ ] N + B -> N + B
+; [x] N + B -> SC + B
+define void @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline() "aarch64_pstate_sm_body" {
+; CHECK-LABEL: @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_compatible_locally_streaming_callee()
+  ret void
+}
+
+; [x] SC -> N
+; [ ] SC -> S
+; [ ] SC -> SC
+; [ ] SC -> N + B
+; [ ] SC -> SC + B
+define void @streaming_compatible_caller_normal_callee_dont_inline() "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: @streaming_compatible_caller_normal_callee_dont_inline(
+; CHECK: call void @normal_callee()
+entry:
+  call void @normal_callee()
+  ret void
+}
+
+; [ ] SC -> N
+; [x] SC -> S
+; [ ] SC -> SC
+; [ ] SC -> N + B
+; [ ] SC -> SC + B
+define void @streaming_compatible_caller_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: @streaming_compatible_caller_streaming_callee_dont_inline(
+; CHECK: call void @streaming_callee()
+entry:
+  call void @streaming_callee()
+  ret void
+}
+
+; [ ] SC -> N
+; [ ] SC -> S
+; [x] SC -> SC
+; [ ] SC -> N + B
+; [ ] SC -> SC + B
+define void @streaming_compatible_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: @streaming_compatible_caller_streaming_compatible_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_compatible_callee()
+  ret void
+}
+
+; [ ] SC -> N
+; [ ] SC -> S
+; [ ] SC -> SC
+; [x] SC -> N + B
+; [ ] SC -> SC + B
+define void @streaming_compatible_caller_locally_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: @streaming_compatible_caller_locally_streaming_callee_dont_inline(
+; CHECK: call void @locally_streaming_callee()
+entry:
+  call void @locally_streaming_callee()
+  ret void
+}
+
+; [ ] SC -> N
+; [ ] SC -> S
+; [ ] SC -> SC
+; [ ] SC -> N + B
+; [x] SC -> SC + B
+define void @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline(
+; CHECK: call void @streaming_compatible_locally_streaming_callee()
+entry:
+  call void @streaming_compatible_locally_streaming_callee()
+  ret void
+}
+; [x] SC + B -> N
+; [ ] SC + B -> S
+; [ ] SC + B -> SC
+; [ ] SC + B -> N + B
+; [ ] SC + B -> SC + B
+define void @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline(
+; CHECK: call void @normal_callee()
+entry:
+  call void @normal_callee()
+  ret void
+}
+
+; [ ] SC + B -> N
+; [x] SC + B -> S
+; [ ] SC + B -> SC
+; [ ] SC + B -> N + B
+; [ ] SC + B -> SC + B
+define void @streaming_compatible_locally_streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_streaming_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_callee()
+  ret void
+}
+
+; [ ] SC + B -> N
+; [ ] SC + B -> S
+; [x] SC + B -> SC
+; [ ] SC + B -> N + B
+; [ ] SC + B -> SC + B
+define void @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_compatible_callee()
+  ret void
+}
+
+; [ ] SC + B -> N
+; [ ] SC + B -> S
+; [ ] SC + B -> SC
+; [x] SC + B -> N + B
+; [ ] SC + B -> SC + B
+define void @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @locally_streaming_callee()
+  ret void
+}
+
+; [ ] SC + B -> N
+; [ ] SC + B -> S
+; [ ] SC + B -> SC
+; [ ] SC + B -> N + B
+; [x] SC + B -> SC + B
+define void @streaming_compatible_locally_streaming_caller_and_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_and_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @streaming_compatible_locally_streaming_callee()
+  ret void
+}

diff  --git a/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll
new file mode 100644
index 0000000000000..9dcfaf80019c9
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll
@@ -0,0 +1,122 @@
+; RUN: opt -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -inline < %s | FileCheck %s
+
+declare void @inlined_body()
+
+;
+; Define some functions that will be called by the functions below.
+; These just call a '...body()' function. If we see the call to one of
+; these functions being replaced by '...body()', then we know it has been
+; inlined.
+;
+
+define void @nonza_callee() {
+entry:
+  call void @inlined_body()
+  ret void
+}
+
+define void @shared_za_callee() "aarch64_pstate_za_shared" {
+entry:
+  call void @inlined_body()
+  ret void
+}
+
+define void @new_za_callee() "aarch64_pstate_za_new" {
+  call void @inlined_body()
+  ret void
+}
+
+;
+; Now test that inlining only happens when no lazy-save is needed.
+; Test for a number of combinations, where:
+; N   Not using ZA.
+; S   Shared ZA interface
+; Z   New ZA interface
+
+; [x] N -> N
+; [ ] N -> S (This combination is invalid)
+; [ ] N -> Z
+define void @nonza_caller_nonza_callee_inline() {
+; CHECK-LABEL: @nonza_caller_nonza_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @nonza_callee()
+  ret void
+}
+
+; [ ] N -> N
+; [ ] N -> S (This combination is invalid)
+; [x] N -> Z
+define void @nonza_caller_new_za_callee_dont_inline() {
+; CHECK-LABEL: @nonza_caller_new_za_callee_dont_inline(
+; CHECK: call void @new_za_callee()
+entry:
+  call void @new_za_callee()
+  ret void
+}
+
+; [x] Z -> N
+; [ ] Z -> S
+; [ ] Z -> Z
+define void @new_za_caller_nonza_callee_dont_inline() "aarch64_pstate_za_new" {
+; CHECK-LABEL: @new_za_caller_nonza_callee_dont_inline(
+; CHECK: call void @nonza_callee()
+entry:
+  call void @nonza_callee()
+  ret void
+}
+
+; [ ] Z -> N
+; [x] Z -> S
+; [ ] Z -> Z
+define void @new_za_caller_shared_za_callee_inline() "aarch64_pstate_za_new" {
+; CHECK-LABEL: @new_za_caller_shared_za_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @shared_za_callee()
+  ret void
+}
+
+; [ ] Z -> N
+; [ ] Z -> S
+; [x] Z -> Z
+define void @new_za_caller_new_za_callee_dont_inline() "aarch64_pstate_za_new" {
+; CHECK-LABEL: @new_za_caller_new_za_callee_dont_inline(
+; CHECK: call void @new_za_callee()
+entry:
+  call void @new_za_callee()
+  ret void
+}
+
+; [x] Z -> N
+; [ ] Z -> S
+; [ ] Z -> Z
+define void @shared_za_caller_nonza_callee_dont_inline() "aarch64_pstate_za_shared" {
+; CHECK-LABEL: @shared_za_caller_nonza_callee_dont_inline(
+; CHECK: call void @nonza_callee()
+entry:
+  call void @nonza_callee()
+  ret void
+}
+
+; [ ] S -> N
+; [x] S -> Z
+; [ ] S -> S
+define void @shared_za_caller_new_za_callee_dont_inline() "aarch64_pstate_za_shared" {
+; CHECK-LABEL: @shared_za_caller_new_za_callee_dont_inline(
+; CHECK: call void @new_za_callee()
+entry:
+  call void @new_za_callee()
+  ret void
+}
+
+; [ ] S -> N
+; [ ] S -> Z
+; [x] S -> S
+define void @shared_za_caller_shared_za_callee_inline() "aarch64_pstate_za_shared" {
+; CHECK-LABEL: @shared_za_caller_shared_za_callee_inline(
+; CHECK: call void @inlined_body()
+entry:
+  call void @shared_za_callee()
+  ret void
+}


        


More information about the llvm-commits mailing list