[llvm] f3188b9 - [AArch64][SME2] Add multi-vector frint intrinsics
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 7 03:34:27 PST 2023
Author: Kerry McLaughlin
Date: 2023-02-07T11:34:07Z
New Revision: f3188b98d05dc17417d6783265f38fe012a5b548
URL: https://github.com/llvm/llvm-project/commit/f3188b98d05dc17417d6783265f38fe012a5b548
DIFF: https://github.com/llvm/llvm-project/commit/f3188b98d05dc17417d6783265f38fe012a5b548.diff
LOG: [AArch64][SME2] Add multi-vector frint intrinsics
Adds x2 and x4 vector intrinsics for the following instructions:
- frinta
- frintm
- frintn
- frintp
NOTE: These intrinsics are still in development and are subject to future changes.
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D143058
Added:
llvm/test/CodeGen/AArch64/sme2-intrinsics-frint.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 6479b7d442e0..4501ecb66350 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3044,6 +3044,13 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sve_sqdmulh_vgx2 : SME2_VG2_Multi_Multi_Intrinsic;
def int_aarch64_sve_sqdmulh_vgx4 : SME2_VG4_Multi_Multi_Intrinsic;
+ // Multi-vector floating-point round to integral value
+
+ foreach inst = ["a", "m", "n", "p"] in {
+ def int_aarch64_sve_frint # inst # _x2 : SVE2_VG2_ZipUzp_Intrinsic;
+ def int_aarch64_sve_frint # inst # _x4 : SVE2_VG4_ZipUzp_Intrinsic;
+ }
+
//
// Multi-vector min/max
//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index a9ee34646bde..2eb3101345c0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -367,6 +367,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
bool IsTupleInput, unsigned Opc);
+ void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
template <unsigned MaxIdx, unsigned Scale>
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
@@ -1874,6 +1875,13 @@ void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}
+void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
+ unsigned Opcode) {
+ if (N->getValueType(0) != MVT::nxv4f32)
+ return;
+ SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
+}
+
void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
unsigned Op) {
SDLoc DL(N);
@@ -5380,6 +5388,30 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
AArch64::UZP_VG4_4Z4Z_Q);
return;
+ case Intrinsic::aarch64_sve_frinta_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frinta_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintm_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintm_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintn_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintn_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintp_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintp_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
+ return;
}
break;
}
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-frint.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-frint.ll
new file mode 100644
index 000000000000..95c61f582ff6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-frint.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
+
+; FRINTA
+
+define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frinta_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; CHECK-LABEL: multi_vec_frinta_x2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: frinta { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frinta_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
+; CHECK-LABEL: multi_vec_frinta_x4_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z6.d, z3.d
+; CHECK-NEXT: mov z5.d, z2.d
+; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: frinta { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
+ ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+; FRINTM
+
+define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintm_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; CHECK-LABEL: multi_vec_frintm_x2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: frintm { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintm_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
+; CHECK-LABEL: multi_vec_frintm_x4_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z6.d, z3.d
+; CHECK-NEXT: mov z5.d, z2.d
+; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: frintm { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
+ ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+; FRINTN
+
+define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintn_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; CHECK-LABEL: multi_vec_frintn_x2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: frintn { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintn_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
+; CHECK-LABEL: multi_vec_frintn_x4_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z6.d, z3.d
+; CHECK-NEXT: mov z5.d, z2.d
+; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: frintn { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
+ ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+; FRINTP
+
+define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintp_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; CHECK-LABEL: multi_vec_frintp_x2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: frintp { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintp_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
+; CHECK-LABEL: multi_vec_frintp_x4_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z6.d, z3.d
+; CHECK-NEXT: mov z5.d, z2.d
+; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: frintp { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
+ ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
More information about the llvm-commits
mailing list