[llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub

Sun Jan 22 16:46:31 PST 2012

On Sun, Jan 22, 2012 at 4:07 AM, Anton Korobeynikov <asl at math.spbu.ru> wrote:
> +//===----------------------------------------------------------------------===//
> +// Fused FP Multiply-Accumulate Operations.
> +//
> +def VFMAD : ADbI<0b11101, 0b10, 0, 0,
> +                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> +                 IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
> +                 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
> +                                          (f64 DPR:$Ddin)))]>,
> +              RegConstraint<"$Ddin = $Dd">,
> +              Requires<[HasVFP4]>;
> +
> +def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
> +                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> +                  IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
> +                  [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
> +                                           SPR:$Sdin))]>,
> +              RegConstraint<"$Sdin = $Sd">,
> +              Requires<[HasVFP4,DontUseNEONForFP]> {
> +  // Some single precision VFP instructions may be executed on both NEON and
> +  // VFP pipelines.
> +}
> +
> +def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
> +          (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
> +          Requires<[HasVFP4]>;
> +def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
> +          (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
> +          Requires<[HasVFP4,DontUseNEONForFP]>;
> +
> +def VFMSD : ADbI<0b11101, 0b10, 1, 0,
> +                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> +                 IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
> +                 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
> +                                          (f64 DPR:$Ddin)))]>,
> +              RegConstraint<"$Ddin = $Dd">,
> +              Requires<[HasVFP4]>;
> +
> +def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
> +                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> +                  IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
> +                  [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
> +                                           SPR:$Sdin))]>,
> +              RegConstraint<"$Sdin = $Sd">,
> +              Requires<[HasVFP4,DontUseNEONForFP]> {
> +  // Some single precision VFP instructions may be executed on both NEON and
> +  // VFP pipelines.
> +}
> +
> +def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
> +          (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
> +          Requires<[HasVFP4]>;
> +def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
> +          (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
> +          Requires<[HasVFP4,DontUseNEONForFP]>;
> +
> +def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
> +                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> +                  IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
> +                  [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
> +                                          (f64 DPR:$Ddin)))]>,
> +                RegConstraint<"$Ddin = $Dd">,
> +                Requires<[HasVFP4]>;
> +
> +def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
> +                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> +                  IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
> +                  [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
> +                                           SPR:$Sdin))]>,
> +                RegConstraint<"$Sdin = $Sd">,
> +                Requires<[HasVFP4,DontUseNEONForFP]> {
> +  // Some single precision VFP instructions may be executed on both NEON and
> +  // VFP pipelines.
> +}
> +
> +def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
> +          (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
> +          Requires<[HasVFP4]>;
> +def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
> +          (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
> +          Requires<[HasVFP4,DontUseNEONForFP]>;
> +
> +def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
> +                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> +                  IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
> +                  [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
> +                                           (f64 DPR:$Ddin)))]>,
> +               RegConstraint<"$Ddin = $Dd">,
> +               Requires<[HasVFP4]>;
> +
> +def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
> +                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> +                  IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
> +             [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
> +                         RegConstraint<"$Sdin = $Sd">,
> +                  Requires<[HasVFP4,DontUseNEONForFP]> {
> +  // Some single precision VFP instructions may be executed on both NEON and
> +  // VFP pipelines.
> +}
> +
> +def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
> +          (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
> +          Requires<[HasVFP4]>;
> +def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
> +          (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
> +          Requires<[HasVFP4,DontUseNEONForFP]>;

I'm a bit concerned about these patterns: a multiply followed by an
add is not, strictly speaking, the same thing as a fused multiply-add.
 We have an FMA intrinsic (http://llvm.org/docs/LangRef.html#int_fma);
that should map onto this instruction, and we should only transform an
unfused multiply+add in fast-math mode.

-Eli