[llvm] 77b13a5 - [AArch64][SME] Add SME addha/va intrinsics
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 5 01:47:55 PDT 2022
Author: David Sherwood
Date: 2022-07-05T09:47:17+01:00
New Revision: 77b13a57a9307e69e9dcf4dd0269aff704807267
URL: https://github.com/llvm/llvm-project/commit/77b13a57a9307e69e9dcf4dd0269aff704807267
DIFF: https://github.com/llvm/llvm-project/commit/77b13a57a9307e69e9dcf4dd0269aff704807267.diff
LOG: [AArch64][SME] Add SME addha/va intrinsics
This patch adds new the following SME intrinsics:
@llvm.aarch64.sme.addva
@llvm.aarch64.sme.addha
Differential Revision: https://reviews.llvm.org/D127861
Added:
llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/SMEInstrFormats.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 1256ab2c9f847..fcc4680be9377 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2663,6 +2663,16 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic;
def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic;
+ class SME_AddVectorToTile_Intrinsic
+ : DefaultAttrsIntrinsic<[],
+ [llvm_i64_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty]>;
+
+ def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic;
+ def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic;
+
//
// Counting elements
//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 58a78c2e3c245..76e0d858daa48 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2436,6 +2436,23 @@ AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const {
return BB;
}
+MachineBasicBlock *
+AArch64TargetLowering::EmitAddVectorToTile(unsigned Opc, unsigned BaseReg,
+ MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
+
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm());
+ MIB.add(MI.getOperand(1)); // pn
+ MIB.add(MI.getOperand(2)); // pm
+ MIB.add(MI.getOperand(3)); // zn
+
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
@@ -2568,6 +2585,14 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
BB);
case AArch64::ZERO_M_PSEUDO:
return EmitZero(MI, BB);
+ case AArch64::ADDHA_MPPZ_PSEUDO_S:
+ return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::ADDVA_MPPZ_PSEUDO_S:
+ return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::ADDHA_MPPZ_PSEUDO_D:
+ return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::ADDVA_MPPZ_PSEUDO_D:
+ return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_D, AArch64::ZAD0, MI, BB);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index c7a6acc394d76..48a559b4352ac 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -571,6 +571,9 @@ class AArch64TargetLowering : public TargetLowering {
MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitAddVectorToTile(unsigned Opc, unsigned BaseReg,
+ MachineInstr &MI,
+ MachineBasicBlock *BB) const;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 2744e81f99f10..cb36aa26e8398 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -227,6 +227,40 @@ class sme_add_vector_to_tile_u64<bit V, string mnemonic>
let Inst{2-0} = ZAda;
}
+class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty>
+ : Pseudo<(outs),
+ (ins i64imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>,
+ Sched<[]> {
+ // Translated to the actual instructions in AArch64ISelLowering.cpp
+ let usesCustomInserter = 1;
+}
+
+def ADDHA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>;
+def ADDVA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>;
+
+def : Pat<(int_aarch64_sme_addha
+ imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
+ (nxv4i32 ZPR32:$zn)),
+ (ADDHA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>;
+def : Pat<(int_aarch64_sme_addva
+ imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
+ (nxv4i32 ZPR32:$zn)),
+ (ADDVA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>;
+
+let Predicates = [HasSMEI64] in {
+def ADDHA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>;
+def ADDVA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>;
+
+def : Pat<(int_aarch64_sme_addha
+ imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
+ (nxv2i64 ZPR64:$zn)),
+ (ADDHA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>;
+def : Pat<(int_aarch64_sme_addva
+ imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
+ (nxv2i64 ZPR64:$zn)),
+ (ADDVA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>;
+}
+
//===----------------------------------------------------------------------===//
// SME Contiguous Loads
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll
new file mode 100644
index 0000000000000..d979484a7c5a0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme-i64 -verify-machineinstrs < %s | FileCheck %s
+
+define void @addha_s(<vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x i32> %zn) {
+; CHECK-LABEL: addha_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addha za0.s, p0/m, p1/m, z0.s
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.addha.nxv4i32(i64 0, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x i32> %zn)
+ ret void
+}
+
+define void @addva_s(<vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x i32> %zn) {
+; CHECK-LABEL: addva_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addva za3.s, p0/m, p1/m, z0.s
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.addva.nxv4i32(i64 3, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x i32> %zn)
+ ret void
+}
+
+define void @addha_d(<vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x i64> %zn) {
+; CHECK-LABEL: addha_d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addha za0.d, p0/m, p1/m, z0.d
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.addha.nxv2i64(i64 0, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x i64> %zn)
+ ret void
+}
+
+define void @addva_d(<vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x i64> %zn) {
+; CHECK-LABEL: addva_d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addva za7.d, p0/m, p1/m, z0.d
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sme.addva.nxv2i64(i64 7, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x i64> %zn)
+ ret void
+}
+
+declare void @llvm.aarch64.sme.addha.nxv4i32(i64, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare void @llvm.aarch64.sme.addha.nxv2i64(i64, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare void @llvm.aarch64.sme.addva.nxv4i32(i64, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare void @llvm.aarch64.sme.addva.nxv2i64(i64, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x i64>)
More information about the llvm-commits
mailing list