[llvm] 921e89c - [SVE] Only combine (fneg (fma)) => FNMLA with nsz
Peter Waller via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 13 03:34:45 PST 2021
Author: Peter Waller
Date: 2021-12-13T11:33:07Z
New Revision: 921e89c59a71ca3487e175164afba15e76ae2e09
URL: https://github.com/llvm/llvm-project/commit/921e89c59a71ca3487e175164afba15e76ae2e09
DIFF: https://github.com/llvm/llvm-project/commit/921e89c59a71ca3487e175164afba15e76ae2e09.diff
LOG: [SVE] Only combine (fneg (fma)) => FNMLA with nsz
-(Za + Zm * Zn) != (-Za + Zm * (-Zn))
when the FMA produces a zero output (e.g. all zero inputs can produce -0
output)
Add a PatFrag to check presence of nsz on the fneg, add tests which
ensure the combine does not fire in the absense of nsz.
See https://reviews.llvm.org/D90901 for a similar discussion on X86.
Differential Revision: https://reviews.llvm.org/D109525
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve-fp-combine.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d0e2b7ee882d..332397454835 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3111,6 +3111,8 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
SDValue Op = getValue(I.getOperand(0));
SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 32d1b6adc286..792e268137b1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19117,7 +19117,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
if (isMergePassthruOpcode(NewOp))
Operands.push_back(DAG.getUNDEF(VT));
- return DAG.getNode(NewOp, DL, VT, Operands);
+ return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
}
// If a fixed length vector operation has no side effects when applied to
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 52cfeb5051cb..53f0b3b0a97a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -275,6 +275,11 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
return N->hasOneUse();
}]>;
+def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
+ (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
+ return N->getFlags().hasNoSignedZeros();
+}]>;
+
def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>,
SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
@@ -536,7 +541,8 @@ let Predicates = [HasSVEorStreamingSVE] in {
(!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
// Zd = -(Za + Zn * Zm)
- def : Pat<(AArch64fneg_mt PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))),
+ // (with nsz neg.)
+ def : Pat<(AArch64fneg_mt_nsz PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))),
(!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
// Zda = Zda + Zn * Zm
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
index d9bf9653097c..9daa936e52c0 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
@@ -549,7 +549,7 @@ define <vscale x 8 x half> @fnmla_h_reversed(<vscale x 8 x half> %acc, <vscale x
; CHECK-NEXT: ret
%mul = fmul contract <vscale x 8 x half> %m1, %m2
%add = fadd contract <vscale x 8 x half> %mul, %acc
- %res = fneg contract <vscale x 8 x half> %add
+ %res = fneg contract nsz <vscale x 8 x half> %add
ret <vscale x 8 x half> %res
}
@@ -561,7 +561,7 @@ define <vscale x 4 x half> @fnmla_hx4_reversed(<vscale x 4 x half> %acc, <vscale
; CHECK-NEXT: ret
%mul = fmul contract <vscale x 4 x half> %m1, %m2
%add = fadd contract <vscale x 4 x half> %mul, %acc
- %res = fneg contract <vscale x 4 x half> %add
+ %res = fneg contract nsz <vscale x 4 x half> %add
ret <vscale x 4 x half> %res
}
@@ -573,7 +573,7 @@ define <vscale x 2 x half> @fnmla_hx2_reversed(<vscale x 2 x half> %acc, <vscale
; CHECK-NEXT: ret
%mul = fmul contract <vscale x 2 x half> %m1, %m2
%add = fadd contract <vscale x 2 x half> %mul, %acc
- %res = fneg contract <vscale x 2 x half> %add
+ %res = fneg contract nsz <vscale x 2 x half> %add
ret <vscale x 2 x half> %res
}
@@ -585,7 +585,7 @@ define <vscale x 4 x float> @fnmla_s_reversed(<vscale x 4 x float> %acc, <vscale
; CHECK-NEXT: ret
%mul = fmul contract <vscale x 4 x float> %m1, %m2
%add = fadd contract <vscale x 4 x float> %mul, %acc
- %res = fneg contract <vscale x 4 x float> %add
+ %res = fneg contract nsz <vscale x 4 x float> %add
ret <vscale x 4 x float> %res
}
@@ -597,7 +597,7 @@ define <vscale x 2 x float> @fnmla_sx2_reversed(<vscale x 2 x float> %acc, <vsca
; CHECK-NEXT: ret
%mul = fmul contract <vscale x 2 x float> %m1, %m2
%add = fadd contract <vscale x 2 x float> %mul, %acc
- %res = fneg contract <vscale x 2 x float> %add
+ %res = fneg contract nsz <vscale x 2 x float> %add
ret <vscale x 2 x float> %res
}
@@ -606,6 +606,84 @@ define <vscale x 2 x double> @fnmla_d_reversed(<vscale x 2 x double> %acc, <vsca
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fnmla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: ret
+ %mul = fmul contract <vscale x 2 x double> %m1, %m2
+ %add = fadd contract <vscale x 2 x double> %mul, %acc
+ %res = fneg contract nsz <vscale x 2 x double> %add
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @signed_zeros_negtest_fnmla_h_reversed(<vscale x 8 x half> %acc, <vscale x 8 x half> %m1, <vscale x 8 x half> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_h_reversed:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: fneg z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %mul = fmul contract <vscale x 8 x half> %m1, %m2
+ %add = fadd contract <vscale x 8 x half> %mul, %acc
+ %res = fneg contract <vscale x 8 x half> %add
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @signed_zeros_negtest_fnmla_hx4_reversed(<vscale x 4 x half> %acc, <vscale x 4 x half> %m1, <vscale x 4 x half> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_hx4_reversed:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: fneg z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %mul = fmul contract <vscale x 4 x half> %m1, %m2
+ %add = fadd contract <vscale x 4 x half> %mul, %acc
+ %res = fneg contract <vscale x 4 x half> %add
+ ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @signed_zeros_negtest_fnmla_hx2_reversed(<vscale x 2 x half> %acc, <vscale x 2 x half> %m1, <vscale x 2 x half> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_hx2_reversed:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: fneg z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %mul = fmul contract <vscale x 2 x half> %m1, %m2
+ %add = fadd contract <vscale x 2 x half> %mul, %acc
+ %res = fneg contract <vscale x 2 x half> %add
+ ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @signed_zeros_negtest_fnmla_s_reversed(<vscale x 4 x float> %acc, <vscale x 4 x float> %m1, <vscale x 4 x float> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_s_reversed:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: fneg z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %mul = fmul contract <vscale x 4 x float> %m1, %m2
+ %add = fadd contract <vscale x 4 x float> %mul, %acc
+ %res = fneg contract <vscale x 4 x float> %add
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @signed_zeros_negtest_fnmla_sx2_reversed(<vscale x 2 x float> %acc, <vscale x 2 x float> %m1, <vscale x 2 x float> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_sx2_reversed:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: fneg z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %mul = fmul contract <vscale x 2 x float> %m1, %m2
+ %add = fadd contract <vscale x 2 x float> %mul, %acc
+ %res = fneg contract <vscale x 2 x float> %add
+ ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @signed_zeros_negtest_fnmla_d_reversed(<vscale x 2 x double> %acc, <vscale x 2 x double> %m1, <vscale x 2 x double> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_d_reversed:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: fneg z0.d, p0/m, z0.d
; CHECK-NEXT: ret
%mul = fmul contract <vscale x 2 x double> %m1, %m2
%add = fadd contract <vscale x 2 x double> %mul, %acc
More information about the llvm-commits
mailing list