[PATCH] D142656: [SVE][codegen] Add pattern for SVE multiply-add accumulate

Fri Feb 3 03:41:36 PST 2023

SjoerdMeijer added a comment.

I experimented with replacing `AArch64mul_p_firstOpndWithSingleUse` -> `AArch64mul_p_oneuse `:

  diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  index 96126b35c6a1..1cda0d41ac78 100644
  --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  @@ -408,7 +408,7 @@ def AArch64mla_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
                                 (add node:$op1, (vselect node:$pred, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))]>;
   def AArch64mad_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
                                [(int_aarch64_sve_mad node:$pred, node:$op1, node:$op2, node:$op3),
  -                              (add node:$op3, (AArch64mul_p_firstOpndWithSingleUse node:$pred, node:$op1, node:$op2))]>;
  +                              (add node:$op3, (AArch64mul_p_oneuse node:$pred, node:$op1, node:$op2))]>;
   def AArch64mls_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
                                [(int_aarch64_sve_mls node:$pred, node:$op1, node:$op2, node:$op3),
                                 (sub node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3)),
  diff --git a/llvm/test/CodeGen/AArch64/sve-multiply-add-accumulate.ll b/llvm/test/CodeGen/AArch64/sve-multiply-add-accumulate.ll
  index b7ee8bfb25c5..51b8f1f129a4 100644
  --- a/llvm/test/CodeGen/AArch64/sve-multiply-add-accumulate.ll
  +++ b/llvm/test/CodeGen/AArch64/sve-multiply-add-accumulate.ll
  @@ -45,10 +45,11 @@ define <vscale x 8 x i16> @muladd_i16_test1(<vscale x 8 x i16> %a, <vscale x 8 x
   define <vscale x 8 x i16> @muladd_i16_test2(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ; CHECK-LABEL: muladd_i16_test2:
   ; CHECK:       // %bb.0:
  +; CHECK-NEXT:    mov w8, #200
  +; CHECK-NEXT:    mov z2.d, z0.d
   ; CHECK-NEXT:    ptrue p0.h
  -; CHECK-NEXT:    movprfx z2, z0
  -; CHECK-NEXT:    mul z2.h, p0/m, z2.h, z1.h
  -; CHECK-NEXT:    add z2.h, z2.h, #200 // =0xc8
  +; CHECK-NEXT:    mov z3.h, w8
  +; CHECK-NEXT:    mad z2.h, p0/m, z1.h, z3.h
   ; CHECK-NEXT:    mul z0.h, p0/m, z0.h, z2.h
   ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
   ; CHECK-NEXT:    ret
  @@ -64,10 +65,12 @@ define <vscale x 8 x i16> @muladd_i16_test2(<vscale x 8 x i16> %a, <vscale x 8 x
   define <vscale x 8 x i16> @muladd_i16_test3(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
   ; CHECK-LABEL: muladd_i16_test3:
   ; CHECK:       // %bb.0:
  +; CHECK-NEXT:    mov w8, #200
  +; CHECK-NEXT:    mov z2.d, z0.d
   ; CHECK-NEXT:    ptrue p0.h
  -; CHECK-NEXT:    mul z1.h, p0/m, z1.h, z0.h
  -; CHECK-NEXT:    add z1.h, z1.h, #200 // =0xc8
  -; CHECK-NEXT:    mul z0.h, p0/m, z0.h, z1.h
  +; CHECK-NEXT:    mov z3.h, w8
  +; CHECK-NEXT:    mad z2.h, p0/m, z1.h, z3.h
  +; CHECK-NEXT:    mul z0.h, p0/m, z0.h, z2.h
   ; CHECK-NEXT:    ret
   {
     %1 = mul <vscale x 8 x i16> %a, %b

So it looks like we are generating more `mad`s, which is interesting but I haven't looked if that is correct. From a quick look, this looks sensible?
The next question is whether this is better....

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D142656/new/

https://reviews.llvm.org/D142656