[llvm] 70629d5 - [SVE] Update patterns to commute FMLS multiplication operands

Tue Mar 1 12:53:25 PST 2022

Author: Cameron McInally
Date: 2022-03-01T12:53:14-08:00
New Revision: 70629d570bb610a6e5ff3be8e3f08456b72f95e4

URL: https://github.com/llvm/llvm-project/commit/70629d570bb610a6e5ff3be8e3f08456b72f95e4
DIFF: https://github.com/llvm/llvm-project/commit/70629d570bb610a6e5ff3be8e3f08456b72f95e4.diff

LOG: [SVE] Update patterns to commute FMLS multiplication operands

Use PatFrags to commute the multiplication operands of an AArch64ISD::FMA_PRED
node, allowing unpredicated FMLS instructions to match.

Reviewed by: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D120570

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 6f926256e021e..823d8c30625d5 100644

--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -292,6 +292,11 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
 def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
                             (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>;
 
+// FMAs with a negated multiplication operand can be commuted.
+def AArch64fmls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
+                          [(AArch64fma_p node:$pred, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op2, node:$op3),
+                           (AArch64fma_p node:$pred, node:$op2, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op3)]>;
+
 def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
                                  (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
   return N->getFlags().hasNoSignedZeros();
@@ -552,7 +557,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
               (!cast<Instruction>("FMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
 
     // Zd = Za + -Zn * Zm
-    def : Pat<(Ty (AArch64fma_p PredTy:$P, (AArch64fneg_mt PredTy:$P, Ty:$Zn, (Ty (undef))), Ty:$Zm, Ty:$Za)),
+    def : Pat<(Ty (AArch64fmls_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za)),
               (!cast<Instruction>("FMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
 
     // Zd = -Za + Zn * Zm

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll
index 130e0bd842f2c..19bc2c5d02354 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp.ll
@@ -312,6 +312,72 @@ define <vscale x 2 x double> @fma_nxv2f64_3(<vscale x 2 x double> %a, <vscale x
   ret <vscale x 2 x double> %r
 }
 
+define <vscale x 8 x half> @fmls_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
+; CHECK-LABEL: fmls_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fmls z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 8 x half> %b
+  %r = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> %c, <vscale x 8 x half> %neg, <vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x half> @fmls_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) {
+; CHECK-LABEL: fmls_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmls z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 4 x half> %b
+  %r = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> %c, <vscale x 4 x half> %neg, <vscale x 4 x half> %a)
+  ret <vscale x 4 x half> %r
+}
+
+define <vscale x 2 x half> @fmls_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) {
+; CHECK-LABEL: fmls_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmls z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 2 x half> %b
+  %r = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> %c, <vscale x 2 x half> %neg, <vscale x 2 x half> %a)
+  ret <vscale x 2 x half> %r
+}
+
+define <vscale x 4 x float> @fmls_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
+; CHECK-LABEL: fmls_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmls z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 4 x float> %b
+  %r = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %c, <vscale x 4 x float> %neg, <vscale x 4 x float> %a)
+  ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x float> @fmls_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) {
+; CHECK-LABEL: fmls_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmls z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 2 x float> %b
+  %r = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %c, <vscale x 2 x float> %neg, <vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x double> @fmls_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
+; CHECK-LABEL: fmls_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmls z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 2 x double> %b
+  %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %c, <vscale x 2 x double> %neg, <vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %r
+}
+
 define <vscale x 8 x half> @fneg_nxv8f16(<vscale x 8 x half> %a) {
 ; CHECK-LABEL: fneg_nxv8f16:
 ; CHECK:       // %bb.0: