[llvm] 921e89c - [SVE] Only combine (fneg (fma)) => FNMLA with nsz

Mon Dec 13 03:34:45 PST 2021

Author: Peter Waller
Date: 2021-12-13T11:33:07Z
New Revision: 921e89c59a71ca3487e175164afba15e76ae2e09

URL: https://github.com/llvm/llvm-project/commit/921e89c59a71ca3487e175164afba15e76ae2e09
DIFF: https://github.com/llvm/llvm-project/commit/921e89c59a71ca3487e175164afba15e76ae2e09.diff

LOG: [SVE] Only combine (fneg (fma)) => FNMLA with nsz

-(Za + Zm * Zn) != (-Za + Zm * (-Zn))
when the FMA produces a zero output (e.g. all zero inputs can produce -0
output)

Add a PatFrag to check presence of nsz on the fneg, add tests which
ensure the combine does not fire in the absense of nsz.

See https://reviews.llvm.org/D90901 for a similar discussion on X86.

Differential Revision: https://reviews.llvm.org/D109525

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve-fp-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d0e2b7ee882d..332397454835 100644

--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3111,6 +3111,8 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
 
 void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
   SDNodeFlags Flags;
+  if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+    Flags.copyFMF(*FPOp);
 
   SDValue Op = getValue(I.getOperand(0));
   SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 32d1b6adc286..792e268137b1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19117,7 +19117,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
   if (isMergePassthruOpcode(NewOp))
     Operands.push_back(DAG.getUNDEF(VT));
 
-  return DAG.getNode(NewOp, DL, VT, Operands);
+  return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
 }
 
 // If a fixed length vector operation has no side effects when applied to

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 52cfeb5051cb..53f0b3b0a97a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -275,6 +275,11 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
   return N->hasOneUse();
 }]>;
 
+def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
+                                  (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
+  return N->getFlags().hasNoSignedZeros();
+}]>;
+
 def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
   SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>,
   SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
@@ -536,7 +541,8 @@ let Predicates = [HasSVEorStreamingSVE] in {
               (!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
 
     // Zd = -(Za + Zn * Zm)
-    def : Pat<(AArch64fneg_mt PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))),
+    // (with nsz neg.)
+    def : Pat<(AArch64fneg_mt_nsz PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))),
               (!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
 
     // Zda = Zda + Zn * Zm

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
index d9bf9653097c..9daa936e52c0 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
@@ -549,7 +549,7 @@ define <vscale x 8 x half> @fnmla_h_reversed(<vscale x 8 x half> %acc, <vscale x
 ; CHECK-NEXT:    ret
   %mul = fmul contract <vscale x 8 x half> %m1, %m2
   %add = fadd contract <vscale x 8 x half> %mul, %acc
-  %res = fneg contract <vscale x 8 x half> %add
+  %res = fneg contract nsz <vscale x 8 x half> %add
   ret <vscale x 8 x half> %res
 }
 
@@ -561,7 +561,7 @@ define <vscale x 4 x half> @fnmla_hx4_reversed(<vscale x 4 x half> %acc, <vscale
 ; CHECK-NEXT:    ret
   %mul = fmul contract <vscale x 4 x half> %m1, %m2
   %add = fadd contract <vscale x 4 x half> %mul, %acc
-  %res = fneg contract <vscale x 4 x half> %add
+  %res = fneg contract nsz <vscale x 4 x half> %add
   ret <vscale x 4 x half> %res
 }
 
@@ -573,7 +573,7 @@ define <vscale x 2 x half> @fnmla_hx2_reversed(<vscale x 2 x half> %acc, <vscale
 ; CHECK-NEXT:    ret
   %mul = fmul contract <vscale x 2 x half> %m1, %m2
   %add = fadd contract <vscale x 2 x half> %mul, %acc
-  %res = fneg contract <vscale x 2 x half> %add
+  %res = fneg contract nsz <vscale x 2 x half> %add
   ret <vscale x 2 x half> %res
 }
 
@@ -585,7 +585,7 @@ define <vscale x 4 x float> @fnmla_s_reversed(<vscale x 4 x float> %acc, <vscale
 ; CHECK-NEXT:    ret
   %mul = fmul contract <vscale x 4 x float> %m1, %m2
   %add = fadd contract <vscale x 4 x float> %mul, %acc
-  %res = fneg contract <vscale x 4 x float> %add
+  %res = fneg contract nsz <vscale x 4 x float> %add
   ret <vscale x 4 x float> %res
 }
 
@@ -597,7 +597,7 @@ define <vscale x 2 x float> @fnmla_sx2_reversed(<vscale x 2 x float> %acc, <vsca
 ; CHECK-NEXT:    ret
   %mul = fmul contract <vscale x 2 x float> %m1, %m2
   %add = fadd contract <vscale x 2 x float> %mul, %acc
-  %res = fneg contract <vscale x 2 x float> %add
+  %res = fneg contract nsz <vscale x 2 x float> %add
   ret <vscale x 2 x float> %res
 }
 
@@ -606,6 +606,84 @@ define <vscale x 2 x double> @fnmla_d_reversed(<vscale x 2 x double> %acc, <vsca
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fnmla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
+  %mul = fmul contract <vscale x 2 x double> %m1, %m2
+  %add = fadd contract <vscale x 2 x double> %mul, %acc
+  %res = fneg contract nsz <vscale x 2 x double> %add
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @signed_zeros_negtest_fnmla_h_reversed(<vscale x 8 x half> %acc, <vscale x 8 x half> %m1, <vscale x 8 x half> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_h_reversed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fmla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %mul = fmul contract <vscale x 8 x half> %m1, %m2
+  %add = fadd contract <vscale x 8 x half> %mul, %acc
+  %res = fneg contract <vscale x 8 x half> %add
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @signed_zeros_negtest_fnmla_hx4_reversed(<vscale x 4 x half> %acc, <vscale x 4 x half> %m1, <vscale x 4 x half> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_hx4_reversed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %mul = fmul contract <vscale x 4 x half> %m1, %m2
+  %add = fadd contract <vscale x 4 x half> %mul, %acc
+  %res = fneg contract <vscale x 4 x half> %add
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @signed_zeros_negtest_fnmla_hx2_reversed(<vscale x 2 x half> %acc, <vscale x 2 x half> %m1, <vscale x 2 x half> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_hx2_reversed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %mul = fmul contract <vscale x 2 x half> %m1, %m2
+  %add = fadd contract <vscale x 2 x half> %mul, %acc
+  %res = fneg contract <vscale x 2 x half> %add
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @signed_zeros_negtest_fnmla_s_reversed(<vscale x 4 x float> %acc, <vscale x 4 x float> %m1, <vscale x 4 x float> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_s_reversed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %mul = fmul contract <vscale x 4 x float> %m1, %m2
+  %add = fadd contract <vscale x 4 x float> %mul, %acc
+  %res = fneg contract <vscale x 4 x float> %add
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @signed_zeros_negtest_fnmla_sx2_reversed(<vscale x 2 x float> %acc, <vscale x 2 x float> %m1, <vscale x 2 x float> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_sx2_reversed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %mul = fmul contract <vscale x 2 x float> %m1, %m2
+  %add = fadd contract <vscale x 2 x float> %mul, %acc
+  %res = fneg contract <vscale x 2 x float> %add
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @signed_zeros_negtest_fnmla_d_reversed(<vscale x 2 x double> %acc, <vscale x 2 x double> %m1, <vscale x 2 x double> %m2) {
+; CHECK-LABEL: signed_zeros_negtest_fnmla_d_reversed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    fneg z0.d, p0/m, z0.d
 ; CHECK-NEXT:    ret
   %mul = fmul contract <vscale x 2 x double> %m1, %m2
   %add = fadd contract <vscale x 2 x double> %mul, %acc