[llvm] 0133586 - [AArch64][SME] Add the zero intrinsic

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 20 06:28:07 PDT 2022


Author: David Sherwood
Date: 2022-06-20T14:27:59+01:00
New Revision: 013358632e657f3138f055313ef7b51cbafe06ce

URL: https://github.com/llvm/llvm-project/commit/013358632e657f3138f055313ef7b51cbafe06ce
DIFF: https://github.com/llvm/llvm-project/commit/013358632e657f3138f055313ef7b51cbafe06ce.diff

LOG: [AArch64][SME] Add the zero intrinsic

The SME zero instruction takes a mask as an input declaring which
64-bit element tiles should be zeroed. There is a 1:1 mapping
between the zero intrinsic and the instruction, however we also
want to make the register allocator aware that some tile
registers are being written to.

We can actually just use the custom inserter for a pseudo instruction
to correctly mark all the appropriate registers in the mask as
implicitly defined by the operation.

 Differential Revision: https://reviews.llvm.org/D127843

Added: 
    llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/lib/Target/AArch64/SMEInstrFormats.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index b67b47c755dcb..a92993df5ee62 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2650,6 +2650,7 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic;
   def int_aarch64_sme_writeq_vert  : SME_VectorToTile_Intrinsic;
 
+  def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i64_ty]>;
 
   //
   // Counting elements

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 51f53862d24cf..4ed7c0905d742 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2376,6 +2376,23 @@ AArch64TargetLowering::EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
   return BB;
 }
 
+MachineBasicBlock *
+AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  MachineInstrBuilder MIB =
+      BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
+  MIB.add(MI.getOperand(0)); // Mask
+
+  unsigned Mask = MI.getOperand(0).getImm();
+  for (unsigned I = 0; I < 8; I++) {
+    if (Mask & (1 << I))
+      MIB.addDef(AArch64::ZAD0 + I, RegState::ImplicitDefine);
+  }
+
+  MI.eraseFromParent(); // The pseudo is gone now.
+  return BB;
+}
+
 MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
     MachineInstr &MI, MachineBasicBlock *BB) const {
   switch (MI.getOpcode()) {
@@ -2458,6 +2475,8 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
   case AArch64::INSERT_MXIPZ_V_PSEUDO_Q:
     return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_Q, AArch64::ZAQ0, MI,
                                   BB);
+  case AArch64::ZERO_M_PSEUDO:
+    return EmitZero(MI, BB);
   }
 }
 

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index b26871c7ecd61..746732c865e8c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -567,6 +567,7 @@ class AArch64TargetLowering : public TargetLowering {
   MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
                                             MachineInstr &MI,
                                             MachineBasicBlock *BB) const;
+  MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
 
   MachineBasicBlock *
   EmitInstrWithCustomInserter(MachineInstr &MI,

diff  --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 4e8be604b5525..2834792e49afc 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -947,8 +947,11 @@ multiclass sme_tile_to_vector<string mnemonic> {
 // SME Zero
 //===----------------------------------------------------------------------===//
 
+// NOTE: This definition isn't really correct because there are outputs, i.e.
+// the tile registers being zeroed. We fix this up in a custom inserter that
+// marks the appropriate registers as being implicitly defined.
 class sme_zero_inst<string mnemonic>
-    : I<(outs MatrixTileList:$imm), (ins),
+    : I<(outs), (ins MatrixTileList:$imm),
         mnemonic, "\t$imm", "", []>, Sched<[]> {
   bits<8> imm;
   let Inst{31-8} = 0b110000000000100000000000;
@@ -973,6 +976,15 @@ multiclass sme_zero<string mnemonic> {
   def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
   def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
   def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
+
+  def NAME # _PSEUDO : Pseudo<(outs), (ins i64imm:$tilelist), []>,
+      Sched<[]> {
+    // Translated to the actual instructions in AArch64ISelLowering.cpp
+    let usesCustomInserter = 1;
+  }
+
+  def : Pat<(int_aarch64_sme_zero imm:$imm),
+            (!cast<Instruction>(NAME # _PSEUDO) imm:$imm)>;
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll
new file mode 100644
index 0000000000000..769ae62060182
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll
@@ -0,0 +1,524 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+
+
+define void @zero() {
+; CHECK-LABEL: zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zero {}
+; CHECK-NEXT:    zero {za0.d}
+; CHECK-NEXT:    zero {za1.d}
+; CHECK-NEXT:    zero {za0.d, za1.d}
+; CHECK-NEXT:    zero {za2.d}
+; CHECK-NEXT:    zero {za0.d, za2.d}
+; CHECK-NEXT:    zero {za1.d, za2.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d}
+; CHECK-NEXT:    zero {za3.d}
+; CHECK-NEXT:    zero {za0.d, za3.d}
+; CHECK-NEXT:    zero {za1.d, za3.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d}
+; CHECK-NEXT:    zero {za2.d, za3.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d}
+; CHECK-NEXT:    zero {za4.d}
+; CHECK-NEXT:    zero {za0.s}
+; CHECK-NEXT:    zero {za1.d, za4.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za4.d}
+; CHECK-NEXT:    zero {za2.d, za4.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za4.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za4.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za4.d}
+; CHECK-NEXT:    zero {za3.d, za4.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za4.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za4.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za4.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za4.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za4.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za4.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za4.d}
+; CHECK-NEXT:    zero {za5.d}
+; CHECK-NEXT:    zero {za0.d, za5.d}
+; CHECK-NEXT:    zero {za1.s}
+; CHECK-NEXT:    zero {za0.d, za1.d, za5.d}
+; CHECK-NEXT:    zero {za2.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za5.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za5.d}
+; CHECK-NEXT:    zero {za3.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za5.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za5.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za5.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za5.d}
+; CHECK-NEXT:    zero {za4.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za1.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za0.s,za1.s}
+; CHECK-NEXT:    zero {za2.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za3.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za4.d, za5.d}
+; CHECK-NEXT:    zero {za6.d}
+; CHECK-NEXT:    zero {za0.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za6.d}
+; CHECK-NEXT:    zero {za2.s}
+; CHECK-NEXT:    zero {za0.d, za2.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za6.d}
+; CHECK-NEXT:    zero {za3.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za6.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za6.d}
+; CHECK-NEXT:    zero {za4.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za2.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za0.h}
+; CHECK-NEXT:    zero {za1.d, za2.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za3.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za4.d, za6.d}
+; CHECK-NEXT:    zero {za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za2.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za1.s,za2.s}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za3.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za2.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.s,za1.s,za2.s}
+; CHECK-NEXT:    zero {za3.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za4.d, za5.d, za6.d}
+; CHECK-NEXT:    zero {za7.d}
+; CHECK-NEXT:    zero {za0.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za7.d}
+; CHECK-NEXT:    zero {za3.s}
+; CHECK-NEXT:    zero {za0.d, za3.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za7.d}
+; CHECK-NEXT:    zero {za4.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za3.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za0.s,za3.s}
+; CHECK-NEXT:    zero {za1.d, za3.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za4.d, za7.d}
+; CHECK-NEXT:    zero {za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za3.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za1.h}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za3.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.s,za1.s,za3.s}
+; CHECK-NEXT:    zero {za2.d, za3.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za4.d, za5.d, za7.d}
+; CHECK-NEXT:    zero {za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za3.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za2.s,za3.s}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za3.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.s,za2.s,za3.s}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za4.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za3.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.s,za2.s,za3.s}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za3.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za2.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za3.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za3.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za3.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za1.d, za3.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za2.d, za3.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za0.d, za2.d, za3.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za1.d, za2.d, za3.d, za4.d, za5.d, za6.d, za7.d}
+; CHECK-NEXT:    zero {za}
+; CHECK-NEXT:    ret
+  call void @llvm.aarch64.sme.zero(i64 0)
+  call void @llvm.aarch64.sme.zero(i64 1)
+  call void @llvm.aarch64.sme.zero(i64 2)
+  call void @llvm.aarch64.sme.zero(i64 3)
+  call void @llvm.aarch64.sme.zero(i64 4)
+  call void @llvm.aarch64.sme.zero(i64 5)
+  call void @llvm.aarch64.sme.zero(i64 6)
+  call void @llvm.aarch64.sme.zero(i64 7)
+  call void @llvm.aarch64.sme.zero(i64 8)
+  call void @llvm.aarch64.sme.zero(i64 9)
+  call void @llvm.aarch64.sme.zero(i64 10)
+  call void @llvm.aarch64.sme.zero(i64 11)
+  call void @llvm.aarch64.sme.zero(i64 12)
+  call void @llvm.aarch64.sme.zero(i64 13)
+  call void @llvm.aarch64.sme.zero(i64 14)
+  call void @llvm.aarch64.sme.zero(i64 15)
+  call void @llvm.aarch64.sme.zero(i64 16)
+  call void @llvm.aarch64.sme.zero(i64 17)
+  call void @llvm.aarch64.sme.zero(i64 18)
+  call void @llvm.aarch64.sme.zero(i64 19)
+  call void @llvm.aarch64.sme.zero(i64 20)
+  call void @llvm.aarch64.sme.zero(i64 21)
+  call void @llvm.aarch64.sme.zero(i64 22)
+  call void @llvm.aarch64.sme.zero(i64 23)
+  call void @llvm.aarch64.sme.zero(i64 24)
+  call void @llvm.aarch64.sme.zero(i64 25)
+  call void @llvm.aarch64.sme.zero(i64 26)
+  call void @llvm.aarch64.sme.zero(i64 27)
+  call void @llvm.aarch64.sme.zero(i64 28)
+  call void @llvm.aarch64.sme.zero(i64 29)
+  call void @llvm.aarch64.sme.zero(i64 30)
+  call void @llvm.aarch64.sme.zero(i64 31)
+  call void @llvm.aarch64.sme.zero(i64 32)
+  call void @llvm.aarch64.sme.zero(i64 33)
+  call void @llvm.aarch64.sme.zero(i64 34)
+  call void @llvm.aarch64.sme.zero(i64 35)
+  call void @llvm.aarch64.sme.zero(i64 36)
+  call void @llvm.aarch64.sme.zero(i64 37)
+  call void @llvm.aarch64.sme.zero(i64 38)
+  call void @llvm.aarch64.sme.zero(i64 39)
+  call void @llvm.aarch64.sme.zero(i64 40)
+  call void @llvm.aarch64.sme.zero(i64 41)
+  call void @llvm.aarch64.sme.zero(i64 42)
+  call void @llvm.aarch64.sme.zero(i64 43)
+  call void @llvm.aarch64.sme.zero(i64 44)
+  call void @llvm.aarch64.sme.zero(i64 45)
+  call void @llvm.aarch64.sme.zero(i64 46)
+  call void @llvm.aarch64.sme.zero(i64 47)
+  call void @llvm.aarch64.sme.zero(i64 48)
+  call void @llvm.aarch64.sme.zero(i64 49)
+  call void @llvm.aarch64.sme.zero(i64 50)
+  call void @llvm.aarch64.sme.zero(i64 51)
+  call void @llvm.aarch64.sme.zero(i64 52)
+  call void @llvm.aarch64.sme.zero(i64 53)
+  call void @llvm.aarch64.sme.zero(i64 54)
+  call void @llvm.aarch64.sme.zero(i64 55)
+  call void @llvm.aarch64.sme.zero(i64 56)
+  call void @llvm.aarch64.sme.zero(i64 57)
+  call void @llvm.aarch64.sme.zero(i64 58)
+  call void @llvm.aarch64.sme.zero(i64 59)
+  call void @llvm.aarch64.sme.zero(i64 60)
+  call void @llvm.aarch64.sme.zero(i64 61)
+  call void @llvm.aarch64.sme.zero(i64 62)
+  call void @llvm.aarch64.sme.zero(i64 63)
+  call void @llvm.aarch64.sme.zero(i64 64)
+  call void @llvm.aarch64.sme.zero(i64 65)
+  call void @llvm.aarch64.sme.zero(i64 66)
+  call void @llvm.aarch64.sme.zero(i64 67)
+  call void @llvm.aarch64.sme.zero(i64 68)
+  call void @llvm.aarch64.sme.zero(i64 69)
+  call void @llvm.aarch64.sme.zero(i64 70)
+  call void @llvm.aarch64.sme.zero(i64 71)
+  call void @llvm.aarch64.sme.zero(i64 72)
+  call void @llvm.aarch64.sme.zero(i64 73)
+  call void @llvm.aarch64.sme.zero(i64 74)
+  call void @llvm.aarch64.sme.zero(i64 75)
+  call void @llvm.aarch64.sme.zero(i64 76)
+  call void @llvm.aarch64.sme.zero(i64 77)
+  call void @llvm.aarch64.sme.zero(i64 78)
+  call void @llvm.aarch64.sme.zero(i64 79)
+  call void @llvm.aarch64.sme.zero(i64 80)
+  call void @llvm.aarch64.sme.zero(i64 81)
+  call void @llvm.aarch64.sme.zero(i64 82)
+  call void @llvm.aarch64.sme.zero(i64 83)
+  call void @llvm.aarch64.sme.zero(i64 84)
+  call void @llvm.aarch64.sme.zero(i64 85)
+  call void @llvm.aarch64.sme.zero(i64 86)
+  call void @llvm.aarch64.sme.zero(i64 87)
+  call void @llvm.aarch64.sme.zero(i64 88)
+  call void @llvm.aarch64.sme.zero(i64 89)
+  call void @llvm.aarch64.sme.zero(i64 90)
+  call void @llvm.aarch64.sme.zero(i64 91)
+  call void @llvm.aarch64.sme.zero(i64 92)
+  call void @llvm.aarch64.sme.zero(i64 93)
+  call void @llvm.aarch64.sme.zero(i64 94)
+  call void @llvm.aarch64.sme.zero(i64 95)
+  call void @llvm.aarch64.sme.zero(i64 96)
+  call void @llvm.aarch64.sme.zero(i64 97)
+  call void @llvm.aarch64.sme.zero(i64 98)
+  call void @llvm.aarch64.sme.zero(i64 99)
+  call void @llvm.aarch64.sme.zero(i64 100)
+  call void @llvm.aarch64.sme.zero(i64 101)
+  call void @llvm.aarch64.sme.zero(i64 102)
+  call void @llvm.aarch64.sme.zero(i64 103)
+  call void @llvm.aarch64.sme.zero(i64 104)
+  call void @llvm.aarch64.sme.zero(i64 105)
+  call void @llvm.aarch64.sme.zero(i64 106)
+  call void @llvm.aarch64.sme.zero(i64 107)
+  call void @llvm.aarch64.sme.zero(i64 108)
+  call void @llvm.aarch64.sme.zero(i64 109)
+  call void @llvm.aarch64.sme.zero(i64 110)
+  call void @llvm.aarch64.sme.zero(i64 111)
+  call void @llvm.aarch64.sme.zero(i64 112)
+  call void @llvm.aarch64.sme.zero(i64 113)
+  call void @llvm.aarch64.sme.zero(i64 114)
+  call void @llvm.aarch64.sme.zero(i64 115)
+  call void @llvm.aarch64.sme.zero(i64 116)
+  call void @llvm.aarch64.sme.zero(i64 117)
+  call void @llvm.aarch64.sme.zero(i64 118)
+  call void @llvm.aarch64.sme.zero(i64 119)
+  call void @llvm.aarch64.sme.zero(i64 120)
+  call void @llvm.aarch64.sme.zero(i64 121)
+  call void @llvm.aarch64.sme.zero(i64 122)
+  call void @llvm.aarch64.sme.zero(i64 123)
+  call void @llvm.aarch64.sme.zero(i64 124)
+  call void @llvm.aarch64.sme.zero(i64 125)
+  call void @llvm.aarch64.sme.zero(i64 126)
+  call void @llvm.aarch64.sme.zero(i64 127)
+  call void @llvm.aarch64.sme.zero(i64 128)
+  call void @llvm.aarch64.sme.zero(i64 129)
+  call void @llvm.aarch64.sme.zero(i64 130)
+  call void @llvm.aarch64.sme.zero(i64 131)
+  call void @llvm.aarch64.sme.zero(i64 132)
+  call void @llvm.aarch64.sme.zero(i64 133)
+  call void @llvm.aarch64.sme.zero(i64 134)
+  call void @llvm.aarch64.sme.zero(i64 135)
+  call void @llvm.aarch64.sme.zero(i64 136)
+  call void @llvm.aarch64.sme.zero(i64 137)
+  call void @llvm.aarch64.sme.zero(i64 138)
+  call void @llvm.aarch64.sme.zero(i64 139)
+  call void @llvm.aarch64.sme.zero(i64 140)
+  call void @llvm.aarch64.sme.zero(i64 141)
+  call void @llvm.aarch64.sme.zero(i64 142)
+  call void @llvm.aarch64.sme.zero(i64 143)
+  call void @llvm.aarch64.sme.zero(i64 144)
+  call void @llvm.aarch64.sme.zero(i64 145)
+  call void @llvm.aarch64.sme.zero(i64 146)
+  call void @llvm.aarch64.sme.zero(i64 147)
+  call void @llvm.aarch64.sme.zero(i64 148)
+  call void @llvm.aarch64.sme.zero(i64 149)
+  call void @llvm.aarch64.sme.zero(i64 150)
+  call void @llvm.aarch64.sme.zero(i64 151)
+  call void @llvm.aarch64.sme.zero(i64 152)
+  call void @llvm.aarch64.sme.zero(i64 153)
+  call void @llvm.aarch64.sme.zero(i64 154)
+  call void @llvm.aarch64.sme.zero(i64 155)
+  call void @llvm.aarch64.sme.zero(i64 156)
+  call void @llvm.aarch64.sme.zero(i64 157)
+  call void @llvm.aarch64.sme.zero(i64 158)
+  call void @llvm.aarch64.sme.zero(i64 159)
+  call void @llvm.aarch64.sme.zero(i64 160)
+  call void @llvm.aarch64.sme.zero(i64 161)
+  call void @llvm.aarch64.sme.zero(i64 162)
+  call void @llvm.aarch64.sme.zero(i64 163)
+  call void @llvm.aarch64.sme.zero(i64 164)
+  call void @llvm.aarch64.sme.zero(i64 165)
+  call void @llvm.aarch64.sme.zero(i64 166)
+  call void @llvm.aarch64.sme.zero(i64 167)
+  call void @llvm.aarch64.sme.zero(i64 168)
+  call void @llvm.aarch64.sme.zero(i64 169)
+  call void @llvm.aarch64.sme.zero(i64 170)
+  call void @llvm.aarch64.sme.zero(i64 171)
+  call void @llvm.aarch64.sme.zero(i64 172)
+  call void @llvm.aarch64.sme.zero(i64 173)
+  call void @llvm.aarch64.sme.zero(i64 174)
+  call void @llvm.aarch64.sme.zero(i64 175)
+  call void @llvm.aarch64.sme.zero(i64 176)
+  call void @llvm.aarch64.sme.zero(i64 177)
+  call void @llvm.aarch64.sme.zero(i64 178)
+  call void @llvm.aarch64.sme.zero(i64 179)
+  call void @llvm.aarch64.sme.zero(i64 180)
+  call void @llvm.aarch64.sme.zero(i64 181)
+  call void @llvm.aarch64.sme.zero(i64 182)
+  call void @llvm.aarch64.sme.zero(i64 183)
+  call void @llvm.aarch64.sme.zero(i64 184)
+  call void @llvm.aarch64.sme.zero(i64 185)
+  call void @llvm.aarch64.sme.zero(i64 186)
+  call void @llvm.aarch64.sme.zero(i64 187)
+  call void @llvm.aarch64.sme.zero(i64 188)
+  call void @llvm.aarch64.sme.zero(i64 189)
+  call void @llvm.aarch64.sme.zero(i64 190)
+  call void @llvm.aarch64.sme.zero(i64 191)
+  call void @llvm.aarch64.sme.zero(i64 192)
+  call void @llvm.aarch64.sme.zero(i64 193)
+  call void @llvm.aarch64.sme.zero(i64 194)
+  call void @llvm.aarch64.sme.zero(i64 195)
+  call void @llvm.aarch64.sme.zero(i64 196)
+  call void @llvm.aarch64.sme.zero(i64 197)
+  call void @llvm.aarch64.sme.zero(i64 198)
+  call void @llvm.aarch64.sme.zero(i64 199)
+  call void @llvm.aarch64.sme.zero(i64 200)
+  call void @llvm.aarch64.sme.zero(i64 201)
+  call void @llvm.aarch64.sme.zero(i64 202)
+  call void @llvm.aarch64.sme.zero(i64 203)
+  call void @llvm.aarch64.sme.zero(i64 204)
+  call void @llvm.aarch64.sme.zero(i64 205)
+  call void @llvm.aarch64.sme.zero(i64 206)
+  call void @llvm.aarch64.sme.zero(i64 207)
+  call void @llvm.aarch64.sme.zero(i64 208)
+  call void @llvm.aarch64.sme.zero(i64 209)
+  call void @llvm.aarch64.sme.zero(i64 210)
+  call void @llvm.aarch64.sme.zero(i64 211)
+  call void @llvm.aarch64.sme.zero(i64 212)
+  call void @llvm.aarch64.sme.zero(i64 213)
+  call void @llvm.aarch64.sme.zero(i64 214)
+  call void @llvm.aarch64.sme.zero(i64 215)
+  call void @llvm.aarch64.sme.zero(i64 216)
+  call void @llvm.aarch64.sme.zero(i64 217)
+  call void @llvm.aarch64.sme.zero(i64 218)
+  call void @llvm.aarch64.sme.zero(i64 219)
+  call void @llvm.aarch64.sme.zero(i64 220)
+  call void @llvm.aarch64.sme.zero(i64 221)
+  call void @llvm.aarch64.sme.zero(i64 222)
+  call void @llvm.aarch64.sme.zero(i64 223)
+  call void @llvm.aarch64.sme.zero(i64 224)
+  call void @llvm.aarch64.sme.zero(i64 225)
+  call void @llvm.aarch64.sme.zero(i64 226)
+  call void @llvm.aarch64.sme.zero(i64 227)
+  call void @llvm.aarch64.sme.zero(i64 228)
+  call void @llvm.aarch64.sme.zero(i64 229)
+  call void @llvm.aarch64.sme.zero(i64 230)
+  call void @llvm.aarch64.sme.zero(i64 231)
+  call void @llvm.aarch64.sme.zero(i64 232)
+  call void @llvm.aarch64.sme.zero(i64 233)
+  call void @llvm.aarch64.sme.zero(i64 234)
+  call void @llvm.aarch64.sme.zero(i64 235)
+  call void @llvm.aarch64.sme.zero(i64 236)
+  call void @llvm.aarch64.sme.zero(i64 237)
+  call void @llvm.aarch64.sme.zero(i64 238)
+  call void @llvm.aarch64.sme.zero(i64 239)
+  call void @llvm.aarch64.sme.zero(i64 240)
+  call void @llvm.aarch64.sme.zero(i64 241)
+  call void @llvm.aarch64.sme.zero(i64 242)
+  call void @llvm.aarch64.sme.zero(i64 243)
+  call void @llvm.aarch64.sme.zero(i64 244)
+  call void @llvm.aarch64.sme.zero(i64 245)
+  call void @llvm.aarch64.sme.zero(i64 246)
+  call void @llvm.aarch64.sme.zero(i64 247)
+  call void @llvm.aarch64.sme.zero(i64 248)
+  call void @llvm.aarch64.sme.zero(i64 249)
+  call void @llvm.aarch64.sme.zero(i64 250)
+  call void @llvm.aarch64.sme.zero(i64 251)
+  call void @llvm.aarch64.sme.zero(i64 252)
+  call void @llvm.aarch64.sme.zero(i64 253)
+  call void @llvm.aarch64.sme.zero(i64 254)
+  call void @llvm.aarch64.sme.zero(i64 255)
+  ret void
+}
+
+declare void @llvm.aarch64.sme.zero(i64)


        


More information about the llvm-commits mailing list