[llvm] f0f474d - [AArch64][SME] Add codegen pass to handle ZA state in arm_new_za functions.

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 5 02:44:36 PDT 2022


Author: David Sherwood
Date: 2022-10-05T09:43:57Z
New Revision: f0f474dfd03b6131e04ce23a63b070c598a14473

URL: https://github.com/llvm/llvm-project/commit/f0f474dfd03b6131e04ce23a63b070c598a14473
DIFF: https://github.com/llvm/llvm-project/commit/f0f474dfd03b6131e04ce23a63b070c598a14473.diff

LOG: [AArch64][SME] Add codegen pass to handle ZA state in arm_new_za functions.

The new pass implements the following:

* Inserts code at the start of an arm_new_za function to
    commit a lazy-save when the lazy-save mechanism is active.
* Adds a smstart intrinsic at the start of the function.
* Adds a smstop intrinsic at the end of the function.

Patch co-authored by kmclaughlin.

Differential Revision: https://reviews.llvm.org/D133896

Added: 
    llvm/lib/Target/AArch64/SMEABIPass.cpp
    llvm/test/CodeGen/AArch64/sme-new-za-function.ll

Modified: 
    llvm/docs/AArch64SME.rst
    llvm/lib/Target/AArch64/AArch64.h
    llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
    llvm/lib/Target/AArch64/CMakeLists.txt
    llvm/test/CodeGen/AArch64/O0-pipeline.ll
    llvm/test/CodeGen/AArch64/O3-pipeline.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/AArch64SME.rst b/llvm/docs/AArch64SME.rst
index 4585bb96664f9..155a7149fd2c4 100644
--- a/llvm/docs/AArch64SME.rst
+++ b/llvm/docs/AArch64SME.rst
@@ -40,6 +40,9 @@ level ACLE attributes:
 ``aarch64_pstate_za_preserved``
   is used for functions with ``__attribute__((arm_preserves_za))``
 
+``aarch64_expanded_pstate_za``
+  is used for functions with ``__attribute__((arm_new_za))``
+
 Clang must ensure that the above attributes are added both to the
 function's declaration/definition as well as to their call-sites. This is
 important for calls to attributed function pointers, where there is no
@@ -423,8 +426,10 @@ to toggle PSTATE.ZA using intrinsics. This also makes it simpler to setup a
 lazy-save mechanism for calls to private-ZA functions (i.e. functions that may
 either directly or indirectly clobber ZA state).
 
-For this purpose, we'll introduce a new LLVM IR pass that is run just before
-SelectionDAG.
+For the purpose of handling functions marked with ``aarch64_pstate_za_new``,
+we have introduced a new LLVM IR pass (SMEABIPass) that is run just before
+SelectionDAG. Any such functions dealt with by this pass are marked with
+``aarch64_expanded_pstate_za``.
 
 Setting up a lazy-save
 ----------------------

diff  --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 476da087a70a4..87fe96d2bfd34 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -58,6 +58,7 @@ FunctionPass *createAArch64MIPeepholeOptPass();
 FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
 
 FunctionPass *createAArch64CollectLOHPass();
+FunctionPass *createSMEABIPass();
 ModulePass *createSVEIntrinsicOptsPass();
 InstructionSelector *
 createAArch64InstructionSelector(const AArch64TargetMachine &,
@@ -100,6 +101,7 @@ void initializeAArch64StorePairSuppressPass(PassRegistry&);
 void initializeFalkorHWPFFixPass(PassRegistry&);
 void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
 void initializeLDTLSCleanupPass(PassRegistry&);
+void initializeSMEABIPass(PassRegistry &);
 void initializeSVEIntrinsicOptsPass(PassRegistry&);
 void initializeAArch64StackTaggingPass(PassRegistry&);
 void initializeAArch64StackTaggingPreRAPass(PassRegistry&);

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index c48643aaaca03..a6f81d8f2cd78 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -224,6 +224,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
   initializeFalkorHWPFFixPass(*PR);
   initializeFalkorMarkStridedAccessesLegacyPass(*PR);
   initializeLDTLSCleanupPass(*PR);
+  initializeSMEABIPass(*PR);
   initializeSVEIntrinsicOptsPass(*PR);
   initializeAArch64SpeculationHardeningPass(*PR);
   initializeAArch64SLSHardeningPass(*PR);
@@ -588,6 +589,11 @@ void AArch64PassConfig::addIRPasses() {
     addPass(createInterleavedAccessPass());
   }
 
+  // Expand any functions marked with SME attributes which require special
+  // changes for the calling convention or that require the lazy-saving
+  // mechanism specified in the SME ABI.
+  addPass(createSMEABIPass());
+
   // Add Control Flow Guard checks.
   if (TM->getTargetTriple().isOSWindows())
     addPass(createCFGuardCheckPass());

diff  --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 898bf1aa57fca..69f891bcb901a 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -83,6 +83,7 @@ add_llvm_target(AArch64CodeGen
   AArch64TargetMachine.cpp
   AArch64TargetObjectFile.cpp
   AArch64TargetTransformInfo.cpp
+  SMEABIPass.cpp
   SVEIntrinsicOpts.cpp
   AArch64SIMDInstrOpt.cpp
 

diff  --git a/llvm/lib/Target/AArch64/SMEABIPass.cpp b/llvm/lib/Target/AArch64/SMEABIPass.cpp
new file mode 100644
index 0000000000000..f209e8f6f6f93
--- /dev/null
+++ b/llvm/lib/Target/AArch64/SMEABIPass.cpp
@@ -0,0 +1,144 @@
+//===--------- SMEABI - SME  ABI-------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements parts of the the SME ABI, such as:
+// * Using the lazy-save mechanism before enabling the use of ZA.
+// * Setting up the lazy-save mechanism around invokes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "Utils/AArch64SMEAttributes.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-sme-abi"
+
+namespace {
+struct SMEABI : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  SMEABI() : FunctionPass(ID) {
+    initializeSMEABIPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+  bool updateNewZAFunctions(Module *M, Function *F, IRBuilder<> &Builder);
+};
+} // end anonymous namespace
+
+void SMEABI::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); }
+
+char SMEABI::ID = 0;
+static const char *name = "SME ABI Pass";
+INITIALIZE_PASS_BEGIN(SMEABI, DEBUG_TYPE, name, false, false)
+INITIALIZE_PASS_END(SMEABI, DEBUG_TYPE, name, false, false)
+
+FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); }
+
+//===----------------------------------------------------------------------===//
+// Utility functions
+//===----------------------------------------------------------------------===//
+
+// Utility function to emit a call to __arm_tpidr2_save and clear TPIDR2_EL0.
+void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) {
+  auto *TPIDR2SaveTy =
+      FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false);
+
+  auto Attrs =
+      AttributeList::get(M->getContext(), 0, {"aarch64_pstate_sm_compatible"});
+  FunctionCallee Callee =
+      M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs);
+  Builder.CreateCall(Callee);
+
+  // A save to TPIDR2 should be followed by clearing TPIDR2_EL0.
+  Function *WriteIntr =
+      Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_set_tpidr2);
+  Builder.CreateCall(WriteIntr->getFunctionType(), WriteIntr,
+                     Builder.getInt64(0));
+}
+
+/// This function generates code to commit a lazy save at the beginning of a
+/// function marked with `aarch64_pstate_za_new`. If the value read from
+/// TPIDR2_EL0 is not null on entry to the function then the lazy-saving scheme
+/// is active and we should call __arm_tpidr2_save to commit the lazy save.
+/// Additionally, PSTATE.ZA should be enabled at the beginning of the function
+/// and disabled before returning.
+bool SMEABI::updateNewZAFunctions(Module *M, Function *F,
+                                  IRBuilder<> &Builder) {
+  LLVMContext &Context = F->getContext();
+  BasicBlock *OrigBB = &F->getEntryBlock();
+
+  // Create the new blocks for reading TPIDR2_EL0 & enabling ZA state.
+  auto *SaveBB = OrigBB->splitBasicBlock(OrigBB->begin(), "save.za", true);
+  auto *PreludeBB = BasicBlock::Create(Context, "prelude", F, SaveBB);
+
+  // Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0.
+  Builder.SetInsertPoint(PreludeBB);
+  Function *TPIDR2Intr =
+      Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_get_tpidr2);
+  auto *TPIDR2 = Builder.CreateCall(TPIDR2Intr->getFunctionType(), TPIDR2Intr,
+                                    {}, "tpidr2");
+  auto *Cmp =
+      Builder.CreateCmp(ICmpInst::ICMP_NE, TPIDR2, Builder.getInt64(0), "cmp");
+  Builder.CreateCondBr(Cmp, SaveBB, OrigBB);
+
+  // Create a call __arm_tpidr2_save, which commits the lazy save.
+  Builder.SetInsertPoint(&SaveBB->back());
+  emitTPIDR2Save(M, Builder);
+
+  // Enable pstate.za at the start of the function.
+  Builder.SetInsertPoint(&OrigBB->front());
+  Function *EnableZAIntr =
+      Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable);
+  Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr);
+
+  // Before returning, disable pstate.za
+  for (BasicBlock &BB : F->getBasicBlockList()) {
+    Instruction *T = BB.getTerminator();
+    if (!T || !isa<ReturnInst>(T))
+      continue;
+    Builder.SetInsertPoint(T);
+    Function *DisableZAIntr =
+        Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_disable);
+    Builder.CreateCall(DisableZAIntr->getFunctionType(), DisableZAIntr);
+  }
+
+  F->addFnAttr("aarch64_expanded_pstate_za");
+  return true;
+}
+
+bool SMEABI::runOnFunction(Function &F) {
+  Module *M = F.getParent();
+  LLVMContext &Context = F.getContext();
+  IRBuilder<> Builder(Context);
+
+  if (F.isDeclaration() || F.hasFnAttribute("aarch64_expanded_pstate_za"))
+    return false;
+
+  bool Changed = false;
+  SMEAttrs FnAttrs(F);
+  if (FnAttrs.hasNewZAInterface())
+    Changed |= updateNewZAFunctions(M, &F, Builder);
+
+  return Changed;
+}

diff  --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index 3c42d1adecd31..64661fc2f1c6a 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -26,6 +26,7 @@
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       AArch64 Stack Tagging
+; CHECK-NEXT:       SME ABI Pass
 ; CHECK-NEXT:       Exception handling preparation
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors

diff  --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 6fee707e8262f..256e2c8cae7fd 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -92,6 +92,7 @@
 ; CHECK-NEXT:       Interleaved Load Combine Pass
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Interleaved Access Pass
+; CHECK-NEXT:       SME ABI Pass
 ; CHECK-NEXT:       Natural Loop Information
 ; CHECK-NEXT:       Type Promotion
 ; CHECK-NEXT:       CodeGen Prepare

diff  --git a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll
new file mode 100644
index 0000000000000..392f590bb2408
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll
@@ -0,0 +1,62 @@
+; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s
+; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi -aarch64-sme-abi %s | FileCheck %s
+
+declare void @shared_za_callee() "aarch64_pstate_za_shared"
+
+define void @private_za() "aarch64_pstate_za_new" {
+; CHECK-LABEL: @private_za(
+; CHECK-NEXT:  prelude:
+; CHECK-NEXT:    [[TPIDR2:%.*]] = call i64 @llvm.aarch64.sme.get.tpidr2()
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[TPIDR2]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[SAVE_ZA:%.*]], label [[TMP0:%.*]]
+; CHECK:       save.za:
+; CHECK-NEXT:    call void @__arm_tpidr2_save()
+; CHECK-NEXT:    call void @llvm.aarch64.sme.set.tpidr2(i64 0)
+; CHECK-NEXT:    br label [[TMP0]]
+; CHECK:       0:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.za.enable()
+; CHECK-NEXT:    call void @shared_za_callee()
+; CHECK-NEXT:    call void @llvm.aarch64.sme.za.disable()
+; CHECK-NEXT:    ret void
+;
+  call void @shared_za_callee()
+  ret void
+}
+
+define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_pstate_za_new" {
+; CHECK-LABEL: @private_za_multiple_exit(
+; CHECK-NEXT:  prelude:
+; CHECK-NEXT:    [[TPIDR2:%.*]] = call i64 @llvm.aarch64.sme.get.tpidr2()
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[TPIDR2]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[SAVE_ZA:%.*]], label [[ENTRY:%.*]]
+; CHECK:       save.za:
+; CHECK-NEXT:    call void @__arm_tpidr2_save()
+; CHECK-NEXT:    call void @llvm.aarch64.sme.set.tpidr2(i64 0)
+; CHECK-NEXT:    br label [[ENTRY]]
+; CHECK:       entry:
+; CHECK-NEXT:    call void @llvm.aarch64.sme.za.enable()
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[COND:%.*]], 1
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    call void @llvm.aarch64.sme.za.disable()
+; CHECK-NEXT:    ret i32 [[ADD]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[A]], [[B]]
+; CHECK-NEXT:    call void @llvm.aarch64.sme.za.disable()
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+entry:
+  %tobool = icmp eq i64 %cond, 1
+  br i1 %tobool, label %if.else, label %if.end
+
+if.else:
+  %add = add i32 %a, %b
+  ret i32 %add
+
+if.end:
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; CHECK: declare "aarch64_pstate_sm_compatible" void @__arm_tpidr2_save()


        


More information about the llvm-commits mailing list