[PATCH] D120570: [SVE] Add pattern to commute FMSB operands

Fri Feb 25 08:35:18 PST 2022

cameron.mcinally created this revision.
cameron.mcinally added reviewers: paulwalker-arm, sdesmalen, david-arm, bsmith, efriedma.
Herald added subscribers: psnobl, hiraditya, kristof.beyls, tschuett.
cameron.mcinally requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

`AArch64ISD::FMA_PRED` nodes with negated operands are matching to suboptimal hardware instructions. We can commute the multiplication operands to generate better instruction sequences. This patch adds a pattern to commute operands for an unmasked FMSB instruction.

I'll try to send matching patches for predicated FMSB and FNMLA patterns soon, assuming this is accepted. Although, if someone is motivated to rework this multiclass in a cohesive way, please feel free.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D120570

Files:
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/test/CodeGen/AArch64/sve-fp.ll


Index: llvm/test/CodeGen/AArch64/sve-fp.ll
===================================================================

--- llvm/test/CodeGen/AArch64/sve-fp.ll
+++ llvm/test/CodeGen/AArch64/sve-fp.ll
@@ -312,6 +312,72 @@
   ret <vscale x 2 x double> %r
 }
 
+define <vscale x 8 x half> @fmsb_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
+; CHECK-LABEL: fmsb_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fmsb z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 8 x half> %b
+  %r = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %neg, <vscale x 8 x half> %c)
+  ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x half> @fmsb_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) {
+; CHECK-LABEL: fmsb_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmsb z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 4 x half> %b
+  %r = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %neg, <vscale x 4 x half> %c)
+  ret <vscale x 4 x half> %r
+}
+
+define <vscale x 2 x half> @fmsb_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) {
+; CHECK-LABEL: fmsb_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmsb z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 2 x half> %b
+  %r = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %neg, <vscale x 2 x half> %c)
+  ret <vscale x 2 x half> %r
+}
+
+define <vscale x 4 x float> @fmsb_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
+; CHECK-LABEL: fmsb_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmsb z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 4 x float> %b
+  %r = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %neg, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x float> @fmsb_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) {
+; CHECK-LABEL: fmsb_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmsb z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 2 x float> %b
+  %r = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %neg, <vscale x 2 x float> %c)
+  ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x double> @fmsb_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
+; CHECK-LABEL: fmsb_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmsb z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
+  %neg = fneg <vscale x 2 x double> %b
+  %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %neg, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %r
+}
+
 define <vscale x 8 x half> @fneg_nxv8f16(<vscale x 8 x half> %a) {
 ; CHECK-LABEL: fneg_nxv8f16:
 ; CHECK:       // %bb.0:
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -546,6 +546,8 @@
     // Zd = Za + -Zn * Zm
     def : Pat<(Ty (AArch64fma_p PredTy:$P, (AArch64fneg_mt PredTy:$P, Ty:$Zn, (Ty (undef))), Ty:$Zm, Ty:$Za)),
               (!cast<Instruction>("FMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
+    def : Pat<(Ty (AArch64fma_p PredTy:$P, Ty:$Zn, (AArch64fneg_mt PredTy:$P, Ty:$Zm, (Ty (undef))), Ty:$Za)),
+              (!cast<Instruction>("FMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zm, ZPR:$Zn)>;
 
     // Zd = -Za + Zn * Zm
     def : Pat<(Ty (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, (AArch64fneg_mt PredTy:$P, Ty:$Za, (Ty (undef))))),


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D120570.411420.patch
Type: text/x-patch
Size: 4043 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220225/5909d207/attachment.bin>