[llvm] 509351d - [SVE] Add lowering for scalable vector fadd, fdiv, fmul and fsub operations.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 16 04:34:34 PDT 2020
Author: Paul Walker
Date: 2020-07-16T11:31:35Z
New Revision: 509351d7689c518f1c2ae8975e704a5324c39ff8
URL: https://github.com/llvm/llvm-project/commit/509351d7689c518f1c2ae8975e704a5324c39ff8
DIFF: https://github.com/llvm/llvm-project/commit/509351d7689c518f1c2ae8975e704a5324c39ff8.diff
LOG: [SVE] Add lowering for scalable vector fadd, fdiv, fmul and fsub operations.
Lower the operations to predicated variants. This is prep work
required for fixed length code generation but also fixes a bug
whereby these operations fail selection when "unpacked" vector
types (e.g. MVT::nxv2f32) are used.
This patch also adds the missing "unpacked" patterns for FMA.
Differential Revision: https://reviews.llvm.org/D83765
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-fp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 85db14ab66fe..dae347cd8c2b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -948,7 +948,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
+ setOperationAction(ISD::FMUL, VT, Custom);
+ setOperationAction(ISD::FSUB, VT, Custom);
}
}
@@ -1483,11 +1487,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FADD_PRED)
MAKE_CASE(AArch64ISD::FADDA_PRED)
MAKE_CASE(AArch64ISD::FADDV_PRED)
+ MAKE_CASE(AArch64ISD::FDIV_PRED)
MAKE_CASE(AArch64ISD::FMA_PRED)
MAKE_CASE(AArch64ISD::FMAXV_PRED)
MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
MAKE_CASE(AArch64ISD::FMINV_PRED)
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
+ MAKE_CASE(AArch64ISD::FMUL_PRED)
+ MAKE_CASE(AArch64ISD::FSUB_PRED)
MAKE_CASE(AArch64ISD::NOT)
MAKE_CASE(AArch64ISD::BIT)
MAKE_CASE(AArch64ISD::CBZ)
@@ -3468,16 +3475,23 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::UMULO:
return LowerXALUO(Op, DAG);
case ISD::FADD:
- if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+ if (Op.getValueType().isScalableVector() ||
+ useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
case ISD::FSUB:
+ if (Op.getValueType().isScalableVector())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
case ISD::FMUL:
+ if (Op.getValueType().isScalableVector())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
case ISD::FMA:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
case ISD::FDIV:
+ if (Op.getValueType().isScalableVector())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 4fe77481706b..982dbc86d169 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -75,9 +75,12 @@ enum NodeType : unsigned {
// Arithmetic instructions
ADD_PRED,
FADD_PRED,
+ FDIV_PRED,
+ FMA_PRED,
+ FMUL_PRED,
+ FSUB_PRED,
SDIV_PRED,
UDIV_PRED,
- FMA_PRED,
SMIN_MERGE_OP1,
UMIN_MERGE_OP1,
SMAX_MERGE_OP1,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 5b1990e49262..1d7b774f2ee4 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -175,7 +175,10 @@ def SDT_AArch64FMA : SDTypeProfile<1, 4, [
// Predicated operations with the result of inactive lanes being unspecified.
def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
+def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
+def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
+def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
@@ -361,6 +364,9 @@ let Predicates = [HasSVE] in {
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">;
defm FADD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fadd_p>;
+ defm FSUB_ZPZZ : sve_fp_bin_pred_hfd<AArch64fsub_p>;
+ defm FMUL_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmul_p>;
+ defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
@@ -377,10 +383,10 @@ let Predicates = [HasSVE] in {
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
}
- defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
- defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
- defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>;
- defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
+ defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
+ defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
+ defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
+ defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
@@ -404,8 +410,14 @@ let Predicates = [HasSVE] in {
// regalloc.
def : Pat<(nxv8f16 (AArch64fma_p nxv8i1:$P, nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3)),
(FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv4f16 (AArch64fma_p nxv4i1:$P, nxv4f16:$Op1, nxv4f16:$Op2, nxv4f16:$Op3)),
+ (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv2f16 (AArch64fma_p nxv2i1:$P, nxv2f16:$Op1, nxv2f16:$Op2, nxv2f16:$Op3)),
+ (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv4f32 (AArch64fma_p nxv4i1:$P, nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3)),
(FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv2f32 (AArch64fma_p nxv2i1:$P, nxv2f32:$Op1, nxv2f32:$Op2, nxv2f32:$Op3)),
+ (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv2f64 (AArch64fma_p nxv2i1:$P, nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3)),
(FMLA_ZPmZZ_D $P, $Op3, $Op1, $Op2)>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a005d1e65abe..ee36ac016800 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -340,6 +340,12 @@ class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
(inst $Op1, $Op2)>;
+class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
+ ValueType pt, ValueType vt1, ValueType vt2,
+ Instruction inst>
+: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)),
+ (inst $Op1, $Op2)>;
+
class SVE_2_Op_Pat_Reduce_To_Neon<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, Instruction inst, SubRegIndex sub>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
@@ -1665,7 +1671,8 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
+ SDPatternOperator predicated_op = null_frag> {
def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
@@ -1674,6 +1681,9 @@ multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pred_All_Active<nxv8f16, predicated_op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pred_All_Active<nxv4f32, predicated_op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pred_All_Active<nxv2f64, predicated_op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
@@ -7804,7 +7814,10 @@ multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll
index 6a882216bcc4..891a5c144234 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp.ll
@@ -5,8 +5,8 @@
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
-define <vscale x 8 x half> @fadd_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
-; CHECK-LABEL: fadd_h:
+define <vscale x 8 x half> @fadd_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fadd_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd z0.h, z0.h, z1.h
; CHECK-NEXT: ret
@@ -14,8 +14,28 @@ define <vscale x 8 x half> @fadd_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
ret <vscale x 8 x half> %res
}
-define <vscale x 4 x float> @fadd_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
-; CHECK-LABEL: fadd_s:
+define <vscale x 4 x half> @fadd_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: fadd_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = fadd <vscale x 4 x half> %a, %b
+ ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @fadd_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; CHECK-LABEL: fadd_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = fadd <vscale x 2 x half> %a, %b
+ ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @fadd_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fadd_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd z0.s, z0.s, z1.s
; CHECK-NEXT: ret
@@ -23,8 +43,18 @@ define <vscale x 4 x float> @fadd_s(<vscale x 4 x float> %a, <vscale x 4 x float
ret <vscale x 4 x float> %res
}
-define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
-; CHECK-LABEL: fadd_d:
+define <vscale x 2 x float> @fadd_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: fadd_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = fadd <vscale x 2 x float> %a, %b
+ ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @fadd_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fadd_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd z0.d, z0.d, z1.d
; CHECK-NEXT: ret
@@ -32,8 +62,68 @@ define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x dou
ret <vscale x 2 x double> %res
}
-define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
-; CHECK-LABEL: fsub_h:
+define <vscale x 8 x half> @fdiv_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fdiv_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = fdiv <vscale x 8 x half> %a, %b
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @fdiv_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: fdiv_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = fdiv <vscale x 4 x half> %a, %b
+ ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @fdiv_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; CHECK-LABEL: fdiv_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = fdiv <vscale x 2 x half> %a, %b
+ ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @fdiv_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fdiv_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = fdiv <vscale x 4 x float> %a, %b
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @fdiv_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: fdiv_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = fdiv <vscale x 2 x float> %a, %b
+ ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @fdiv_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fdiv_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %res = fdiv <vscale x 2 x double> %a, %b
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @fsub_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fsub_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: ret
@@ -41,8 +131,28 @@ define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
ret <vscale x 8 x half> %res
}
-define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
-; CHECK-LABEL: fsub_s:
+define <vscale x 4 x half> @fsub_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: fsub_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = fsub <vscale x 4 x half> %a, %b
+ ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @fsub_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; CHECK-LABEL: fsub_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = fsub <vscale x 2 x half> %a, %b
+ ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @fsub_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fsub_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: ret
@@ -50,8 +160,18 @@ define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float
ret <vscale x 4 x float> %res
}
-define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
-; CHECK-LABEL: fsub_d:
+define <vscale x 2 x float> @fsub_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: fsub_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = fsub <vscale x 2 x float> %a, %b
+ ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @fsub_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fsub_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: ret
@@ -59,8 +179,8 @@ define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x dou
ret <vscale x 2 x double> %res
}
-define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
-; CHECK-LABEL: fmul_h:
+define <vscale x 8 x half> @fmul_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fmul_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fmul z0.h, z0.h, z1.h
; CHECK-NEXT: ret
@@ -68,8 +188,28 @@ define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
ret <vscale x 8 x half> %res
}
-define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
-; CHECK-LABEL: fmul_s:
+define <vscale x 4 x half> @fmul_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: fmul_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = fmul <vscale x 4 x half> %a, %b
+ ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @fmul_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; CHECK-LABEL: fmul_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %res = fmul <vscale x 2 x half> %a, %b
+ ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @fmul_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmul_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmul z0.s, z0.s, z1.s
; CHECK-NEXT: ret
@@ -77,8 +217,18 @@ define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float
ret <vscale x 4 x float> %res
}
-define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
-; CHECK-LABEL: fmul_d:
+define <vscale x 2 x float> @fmul_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: fmul_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %res = fmul <vscale x 2 x float> %a, %b
+ ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @fmul_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmul_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fmul z0.d, z0.d, z1.d
; CHECK-NEXT: ret
@@ -86,8 +236,8 @@ define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x dou
ret <vscale x 2 x double> %res
}
-define <vscale x 8 x half> @fma_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
-; CHECK-LABEL: fma_half:
+define <vscale x 8 x half> @fma_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
+; CHECK-LABEL: fma_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h
@@ -96,8 +246,31 @@ define <vscale x 8 x half> @fma_half(<vscale x 8 x half> %a, <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
ret <vscale x 8 x half> %r
}
-define <vscale x 4 x float> @fma_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
-; CHECK-LABEL: fma_float:
+
+define <vscale x 4 x half> @fma_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) {
+; CHECK-LABEL: fma_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %r = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c)
+ ret <vscale x 4 x half> %r
+}
+
+define <vscale x 2 x half> @fma_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) {
+; CHECK-LABEL: fma_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %r = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c)
+ ret <vscale x 2 x half> %r
+}
+
+define <vscale x 4 x float> @fma_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
+; CHECK-LABEL: fma_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fmla z2.s, p0/m, z0.s, z1.s
@@ -106,8 +279,20 @@ define <vscale x 4 x float> @fma_float(<vscale x 4 x float> %a, <vscale x 4 x fl
%r = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
ret <vscale x 4 x float> %r
}
-define <vscale x 2 x double> @fma_double_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
-; CHECK-LABEL: fma_double_1:
+
+define <vscale x 2 x float> @fma_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) {
+; CHECK-LABEL: fma_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmla z2.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %r = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c)
+ ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x double> @fma_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
+; CHECK-LABEL: fma_nxv2f64_1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmla z2.d, p0/m, z0.d, z1.d
@@ -116,8 +301,9 @@ define <vscale x 2 x double> @fma_double_1(<vscale x 2 x double> %a, <vscale x 2
%r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
ret <vscale x 2 x double> %r
}
-define <vscale x 2 x double> @fma_double_2(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
-; CHECK-LABEL: fma_double_2:
+
+define <vscale x 2 x double> @fma_nxv2f64_2(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
+; CHECK-LABEL: fma_nxv2f64_2:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmla z2.d, p0/m, z1.d, z0.d
@@ -126,8 +312,9 @@ define <vscale x 2 x double> @fma_double_2(<vscale x 2 x double> %a, <vscale x 2
%r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %b, <vscale x 2 x double> %a, <vscale x 2 x double> %c)
ret <vscale x 2 x double> %r
}
-define <vscale x 2 x double> @fma_double_3(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
-; CHECK-LABEL: fma_double_3:
+
+define <vscale x 2 x double> @fma_nxv2f64_3(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
+; CHECK-LABEL: fma_nxv2f64_3:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmla z0.d, p0/m, z2.d, z1.d
@@ -231,7 +418,10 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x
declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
; Function Attrs: nounwind readnone
declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2
More information about the llvm-commits
mailing list