[llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub

Bruno Cardoso Lopes bruno.cardoso at gmail.com
Sun Jan 22 09:34:11 PST 2012


On Sun, Jan 22, 2012 at 10:07 AM, Anton Korobeynikov <asl at math.spbu.ru> wrote:
> Author: asl
> Date: Sun Jan 22 06:07:33 2012
> New Revision: 148658
>
> URL: http://llvm.org/viewvc/llvm-project?rev=148658&view=rev
> Log:
> Add fused multiple+add instructions from VFPv4.
> Patch by Ana Pazos!
>
> Added:
>    llvm/trunk/test/CodeGen/ARM/fusedMAC.ll
> Modified:
>    llvm/trunk/lib/Target/ARM/ARM.td
>    llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp
>    llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
>    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
>    llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
>    llvm/trunk/lib/Target/ARM/ARMSchedule.td
>    llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
>    llvm/trunk/lib/Target/ARM/ARMSubtarget.h
>
> Modified: llvm/trunk/lib/Target/ARM/ARM.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARM.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARM.td Sun Jan 22 06:07:33 2012
> @@ -32,9 +32,15 @@
>  def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
>                                    "Enable VFP3 instructions",
>                                    [FeatureVFP2]>;
> +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
> +                                   "Enable VFP4 instructions",
> +                                   [FeatureVFP3]>;

You probably want "[FeatureVFP4]" here, right?

>  def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
>                                    "Enable NEON instructions",
>                                    [FeatureVFP3]>;
> +def FeatureNEONVFP4 : SubtargetFeature<"neon-vfpv4", "HasNEONVFPv4", "true",
> +                                       "Enable NEON-VFP4 instructions",
> +                                       [FeatureVFP4, FeatureNEON]>;
>  def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
>                                      "Enable Thumb2 instructions">;
>  def FeatureNoARM  : SubtargetFeature<"noarm", "NoARM", "true",
>
> Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Sun Jan 22 06:07:33 2012
> @@ -719,15 +719,25 @@
>
>   if (Subtarget->hasNEON() && emitFPU) {
>     /* NEON is not exactly a VFP architecture, but GAS emit one of
> -     * neon/vfpv3/vfpv2 for .fpu parameters */
> -    AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
> +     * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
> +    if (Subtarget->hasNEONVFP4())
> +      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4");
> +    else
> +     AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
>     /* If emitted for NEON, omit from VFP below, since you can have both
>      * NEON and VFP in build attributes but only one .fpu */
>     emitFPU = false;
>   }
>
> +  /* VFPv4 + .fpu */
> +  if (Subtarget->hasVFP4()) {
> +    AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
> +                               ARMBuildAttrs::AllowFPv4A);
> +    if (emitFPU)
> +      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
> +
>   /* VFPv3 + .fpu */
> -  if (Subtarget->hasVFP3()) {
> +  } else if (Subtarget->hasVFP3()) {
>     AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
>                                ARMBuildAttrs::AllowFPv3A);
>     if (emitFPU)
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Sun Jan 22 06:07:33 2012
> @@ -179,8 +179,14 @@
>                                  AssemblerPredicate<"FeatureVFP2">;
>  def HasVFP3          : Predicate<"Subtarget->hasVFP3()">,
>                                  AssemblerPredicate<"FeatureVFP3">;
> +def HasVFP4          : Predicate<"Subtarget->hasVFP4()">,
> +                                 AssemblerPredicate<"FeatureVFP4">;
> +def NoVFP4            : Predicate<"!Subtarget->hasVFP4()">;
>  def HasNEON          : Predicate<"Subtarget->hasNEON()">,
>                                  AssemblerPredicate<"FeatureNEON">;
> +def HasNEONVFP4      : Predicate<"Subtarget->hasNEONVFP4()">,
> +                                 AssemblerPredicate<"FeatureNEONVFP4">;
> +def NoNEONVFP4       : Predicate<"!Subtarget->hasNEONVFP4()">;
>  def HasFP16          : Predicate<"Subtarget->hasFP16()">,
>                                  AssemblerPredicate<"FeatureFP16">;
>  def HasDivide        : Predicate<"Subtarget->hasDivide()">,
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Sun Jan 22 06:07:33 2012
> @@ -3897,10 +3897,10 @@
>                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
>  def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
>                           v2f32, fmul_su, fadd_mlx>,
> -                Requires<[HasNEON, UseFPVMLx]>;
> +                Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
>  def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
>                           v4f32, fmul_su, fadd_mlx>,
> -                Requires<[HasNEON, UseFPVMLx]>;
> +                Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
>  defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
>                               IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
>  def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
> @@ -3955,10 +3955,10 @@
>                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
>  def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
>                           v2f32, fmul_su, fsub_mlx>,
> -                Requires<[HasNEON, UseFPVMLx]>;
> +                Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
>  def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
>                           v4f32, fmul_su, fsub_mlx>,
> -                Requires<[HasNEON, UseFPVMLx]>;
> +                Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
>  defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
>                               IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
>  def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
> @@ -4007,6 +4007,24 @@
>                             "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
>  defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
>
> +
> +// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
> +def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
> +                          v2f32, fmul_su, fadd_mlx>,
> +                Requires<[HasNEONVFP4]>;
> +
> +def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
> +                          v4f32, fmul_su, fadd_mlx>,
> +                Requires<[HasNEONVFP4]>;
> +
> +//   Fused Vector Multiply Subtract (floating-point)
> +def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
> +                          v2f32, fmul_su, fsub_mlx>,
> +                Requires<[HasNEONVFP4]>;
> +def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
> +                          v4f32, fmul_su, fsub_mlx>,
> +                Requires<[HasNEONVFP4]>;
> +
>  // Vector Subtract Operations.
>
>  //   VSUB     : Vector Subtract (integer and floating-point)
> @@ -5358,9 +5376,13 @@
>  def : N3VSPat<fsub, VSUBfd>;
>  def : N3VSPat<fmul, VMULfd>;
>  def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
> -      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
> +      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
>  def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
> -      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
> +      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
> +def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
> +      Requires<[HasNEONVFP4, UseNEONForFP]>;
> +def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
> +      Requires<[HasNEONVFP4, UseNEONForFP]>;
>  def : N2VSPat<fabs, VABSfd>;
>  def : N2VSPat<fneg, VNEGfd>;
>  def : N3VSPat<NEONfmax, VMAXfd>;
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Sun Jan 22 06:07:33 2012
> @@ -920,7 +920,7 @@
>                  [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
>                                           (f64 DPR:$Ddin)))]>,
>               RegConstraint<"$Ddin = $Dd">,
> -              Requires<[HasVFP2,UseFPVMLx]>;
> +              Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>
>  def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
>                   (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> @@ -928,7 +928,7 @@
>                   [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
>                                            SPR:$Sdin))]>,
>               RegConstraint<"$Sdin = $Sd">,
> -              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
> +              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
>   // Some single precision VFP instructions may be executed on both NEON and
>   // VFP pipelines on A8.
>   let D = VFPNeonA8Domain;
> @@ -936,10 +936,10 @@
>
>  def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
>           (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
> -          Requires<[HasVFP2,UseFPVMLx]>;
> +          Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>  def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
>           (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
> -          Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
> +          Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>;
>
>  def VMLSD : ADbI<0b11100, 0b00, 1, 0,
>                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> @@ -947,7 +947,7 @@
>                  [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
>                                           (f64 DPR:$Ddin)))]>,
>               RegConstraint<"$Ddin = $Dd">,
> -              Requires<[HasVFP2,UseFPVMLx]>;
> +              Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>
>  def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
>                   (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> @@ -955,7 +955,7 @@
>                   [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
>                                            SPR:$Sdin))]>,
>               RegConstraint<"$Sdin = $Sd">,
> -              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
> +              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
>   // Some single precision VFP instructions may be executed on both NEON and
>   // VFP pipelines on A8.
>   let D = VFPNeonA8Domain;
> @@ -963,10 +963,10 @@
>
>  def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
>           (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
> -          Requires<[HasVFP2,UseFPVMLx]>;
> +          Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>  def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
>           (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
> -          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
> +          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
>
>  def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
>                   (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> @@ -974,7 +974,7 @@
>                   [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
>                                           (f64 DPR:$Ddin)))]>,
>                 RegConstraint<"$Ddin = $Dd">,
> -                Requires<[HasVFP2,UseFPVMLx]>;
> +                Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>
>  def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
>                   (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> @@ -982,7 +982,7 @@
>                   [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
>                                            SPR:$Sdin))]>,
>                 RegConstraint<"$Sdin = $Sd">,
> -                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
> +                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
>   // Some single precision VFP instructions may be executed on both NEON and
>   // VFP pipelines on A8.
>   let D = VFPNeonA8Domain;
> @@ -990,10 +990,10 @@
>
>  def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
>           (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
> -          Requires<[HasVFP2,UseFPVMLx]>;
> +          Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>  def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
>           (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
> -          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
> +          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
>
>  def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
>                   (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> @@ -1001,14 +1001,14 @@
>                   [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
>                                            (f64 DPR:$Ddin)))]>,
>                RegConstraint<"$Ddin = $Dd">,
> -               Requires<[HasVFP2,UseFPVMLx]>;
> +               Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>
>  def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
>                   (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
>                   IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
>              [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
>                          RegConstraint<"$Sdin = $Sd">,
> -                  Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
> +                  Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
>   // Some single precision VFP instructions may be executed on both NEON and
>   // VFP pipelines on A8.
>   let D = VFPNeonA8Domain;
> @@ -1016,11 +1016,116 @@
>
>  def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
>           (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
> -          Requires<[HasVFP2,UseFPVMLx]>;
> +          Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>  def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
>           (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
> -          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
> +          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
>
> +//===----------------------------------------------------------------------===//
> +// Fused FP Multiply-Accumulate Operations.
> +//
> +def VFMAD : ADbI<0b11101, 0b10, 0, 0,
> +                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> +                 IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
> +                 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
> +                                          (f64 DPR:$Ddin)))]>,
> +              RegConstraint<"$Ddin = $Dd">,
> +              Requires<[HasVFP4]>;
> +
> +def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
> +                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> +                  IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
> +                  [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
> +                                           SPR:$Sdin))]>,
> +              RegConstraint<"$Sdin = $Sd">,
> +              Requires<[HasVFP4,DontUseNEONForFP]> {
> +  // Some single precision VFP instructions may be executed on both NEON and
> +  // VFP pipelines.
> +}
> +
> +def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
> +          (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
> +          Requires<[HasVFP4]>;
> +def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
> +          (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
> +          Requires<[HasVFP4,DontUseNEONForFP]>;
> +
> +def VFMSD : ADbI<0b11101, 0b10, 1, 0,
> +                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> +                 IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
> +                 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
> +                                          (f64 DPR:$Ddin)))]>,
> +              RegConstraint<"$Ddin = $Dd">,
> +              Requires<[HasVFP4]>;
> +
> +def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
> +                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> +                  IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
> +                  [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
> +                                           SPR:$Sdin))]>,
> +              RegConstraint<"$Sdin = $Sd">,
> +              Requires<[HasVFP4,DontUseNEONForFP]> {
> +  // Some single precision VFP instructions may be executed on both NEON and
> +  // VFP pipelines.
> +}
> +
> +def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
> +          (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
> +          Requires<[HasVFP4]>;
> +def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
> +          (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
> +          Requires<[HasVFP4,DontUseNEONForFP]>;
> +
> +def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
> +                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> +                  IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
> +                  [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
> +                                          (f64 DPR:$Ddin)))]>,
> +                RegConstraint<"$Ddin = $Dd">,
> +                Requires<[HasVFP4]>;
> +
> +def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
> +                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> +                  IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
> +                  [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
> +                                           SPR:$Sdin))]>,
> +                RegConstraint<"$Sdin = $Sd">,
> +                Requires<[HasVFP4,DontUseNEONForFP]> {
> +  // Some single precision VFP instructions may be executed on both NEON and
> +  // VFP pipelines.
> +}
> +
> +def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
> +          (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
> +          Requires<[HasVFP4]>;
> +def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
> +          (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
> +          Requires<[HasVFP4,DontUseNEONForFP]>;
> +
> +def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
> +                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> +                  IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
> +                  [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
> +                                           (f64 DPR:$Ddin)))]>,
> +               RegConstraint<"$Ddin = $Dd">,
> +               Requires<[HasVFP4]>;
> +
> +def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
> +                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> +                  IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
> +             [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
> +                         RegConstraint<"$Sdin = $Sd">,
> +                  Requires<[HasVFP4,DontUseNEONForFP]> {
> +  // Some single precision VFP instructions may be executed on both NEON and
> +  // VFP pipelines.
> +}
> +
> +def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
> +          (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
> +          Requires<[HasVFP4]>;
> +def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
> +          (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
> +          Requires<[HasVFP4,DontUseNEONForFP]>;
>
>  //===----------------------------------------------------------------------===//
>  // FP Conditional moves.
>
> Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Sun Jan 22 06:07:33 2012
> @@ -118,6 +118,8 @@
>  def IIC_fpMUL64    : InstrItinClass;
>  def IIC_fpMAC32    : InstrItinClass;
>  def IIC_fpMAC64    : InstrItinClass;
> +def IIC_fpFMAC32   : InstrItinClass;
> +def IIC_fpFMAC64   : InstrItinClass;
>  def IIC_fpDIV32    : InstrItinClass;
>  def IIC_fpDIV64    : InstrItinClass;
>  def IIC_fpSQRT32   : InstrItinClass;
> @@ -208,6 +210,8 @@
>  def IIC_VPERMQ3    : InstrItinClass;
>  def IIC_VMACD      : InstrItinClass;
>  def IIC_VMACQ      : InstrItinClass;
> +def IIC_VFMACD     : InstrItinClass;
> +def IIC_VFMACQ     : InstrItinClass;
>  def IIC_VRECSD     : InstrItinClass;
>  def IIC_VRECSQ     : InstrItinClass;
>  def IIC_VCNTiD     : InstrItinClass;
>
> Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp Sun Jan 22 06:07:33 2012
> @@ -47,7 +47,9 @@
>   , HasV7Ops(false)
>   , HasVFPv2(false)
>   , HasVFPv3(false)
> +  , HasVFPv4(false)
>   , HasNEON(false)
> +  , HasNEONVFPv4(false)
>   , UseNEONForSinglePrecisionFP(false)
>   , SlowFPVMLx(false)
>   , HasVMLxForwarding(false)
>
> Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original)
> +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Sun Jan 22 06:07:33 2012
> @@ -45,11 +45,13 @@
>   bool HasV6T2Ops;
>   bool HasV7Ops;
>
> -  /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
> -  /// supported.
> +  /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what
> +  /// floating point ISAs are supported.
>   bool HasVFPv2;
>   bool HasVFPv3;
> +  bool HasVFPv4;
>   bool HasNEON;
> +  bool HasNEONVFPv4;
>
>   /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
>   /// specified. Use the method useNEONForSinglePrecisionFP() to
> @@ -197,7 +199,9 @@
>
>   bool hasVFP2() const { return HasVFPv2; }
>   bool hasVFP3() const { return HasVFPv3; }
> +  bool hasVFP4() const { return HasVFPv4; }
>   bool hasNEON() const { return HasNEON;  }
> +  bool hasNEONVFP4() const { return HasNEONVFPv4;  }
>   bool useNEONForSinglePrecisionFP() const {
>     return hasNEON() && UseNEONForSinglePrecisionFP; }
>
>
> Added: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fusedMAC.ll?rev=148658&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/fusedMAC.ll (added)
> +++ llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Sun Jan 22 06:07:33 2012
> @@ -0,0 +1,68 @@
> +; RUN: llc < %s -march=arm -mattr=+neon-vfpv4 | FileCheck %s
> +; Check generated fused MAC and MLS.
> +
> +define double @fusedMACTest1(double %d1, double %d2, double %d3) nounwind readnone noinline {
> +;CHECK: fusedMACTest1:
> +;CHECK: vfma.f64
> +  %1 = fmul double %d1, %d2
> +  %2 = fadd double %1, %d3
> +  ret double %2
> +}
> +
> +define float @fusedMACTest2(float %f1, float %f2, float %f3) nounwind readnone noinline {
> +;CHECK: fusedMACTest2:
> +;CHECK: vfma.f32
> +  %1 = fmul float %f1, %f2
> +  %2 = fadd float %1, %f3
> +  ret float %2
> +}
> +
> +define double @fusedMACTest3(double %d1, double %d2, double %d3) nounwind readnone noinline {
> +;CHECK: fusedMACTest3:
> +;CHECK: vfms.f64
> +  %1 = fmul double %d2, %d3
> +  %2 = fsub double %d1, %1
> +  ret double %2
> +}
> +
> +define float @fusedMACTest4(float %f1, float %f2, float %f3) nounwind readnone noinline {
> +;CHECK: fusedMACTest4:
> +;CHECK: vfms.f32
> +  %1 = fmul float %f2, %f3
> +  %2 = fsub float %f1, %1
> +  ret float %2
> +}
> +
> +define double @fusedMACTest5(double %d1, double %d2, double %d3) nounwind readnone noinline {
> +;CHECK: fusedMACTest5:
> +;CHECK: vfnma.f64
> +  %1 = fmul double %d1, %d2
> +  %2 = fsub double -0.0, %1
> +  %3 = fsub double %2, %d3
> +  ret double %3
> +}
> +
> +define float @fusedMACTest6(float %f1, float %f2, float %f3) nounwind {
> +;CHECK: fusedMACTest6:
> +;CHECK: vfnma.f32
> +  %1 = fmul float %f1, %f2
> +  %2 = fsub float -0.0, %1
> +  %3 = fsub float %2, %f3
> +  ret float %3
> +}
> +
> +define double @fusedMACTest7(double %d1, double %d2, double %d3) nounwind {
> +;CHECK: fusedMACTest7:
> +;CHECK: vfnms.f64
> +  %1 = fmul double %d1, %d2
> +  %2 = fsub double %1, %d3
> +  ret double %2
> +}
> +
> +define float @fusedMACTest8(float %f1, float %f2, float %f3) nounwind {
> +;CHECK: fusedMACTest8:
> +;CHECK: vfnms.f32
> +  %1 = fmul float %f1, %f2
> +  %2 = fsub float %1, %f3
> +  ret float %2
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



-- 
Bruno Cardoso Lopes
http://www.brunocardoso.cc




More information about the llvm-commits mailing list