[llvm-commits] [llvm] r148658 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSchedule.td lib/Target/ARM/ARMSub
Bruno Cardoso Lopes
bruno.cardoso at gmail.com
Sun Jan 22 09:34:11 PST 2012
On Sun, Jan 22, 2012 at 10:07 AM, Anton Korobeynikov <asl at math.spbu.ru> wrote:
> Author: asl
> Date: Sun Jan 22 06:07:33 2012
> New Revision: 148658
>
> URL: http://llvm.org/viewvc/llvm-project?rev=148658&view=rev
> Log:
> Add fused multiple+add instructions from VFPv4.
> Patch by Ana Pazos!
>
> Added:
> llvm/trunk/test/CodeGen/ARM/fusedMAC.ll
> Modified:
> llvm/trunk/lib/Target/ARM/ARM.td
> llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp
> llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
> llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
> llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
> llvm/trunk/lib/Target/ARM/ARMSchedule.td
> llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
> llvm/trunk/lib/Target/ARM/ARMSubtarget.h
>
> Modified: llvm/trunk/lib/Target/ARM/ARM.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARM.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARM.td Sun Jan 22 06:07:33 2012
> @@ -32,9 +32,15 @@
> def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
> "Enable VFP3 instructions",
> [FeatureVFP2]>;
> +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
> + "Enable VFP4 instructions",
> + [FeatureVFP3]>;
You probably want "[FeatureVFP4]" here, right?
> def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
> "Enable NEON instructions",
> [FeatureVFP3]>;
> +def FeatureNEONVFP4 : SubtargetFeature<"neon-vfpv4", "HasNEONVFPv4", "true",
> + "Enable NEON-VFP4 instructions",
> + [FeatureVFP4, FeatureNEON]>;
> def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
> "Enable Thumb2 instructions">;
> def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
>
> Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Sun Jan 22 06:07:33 2012
> @@ -719,15 +719,25 @@
>
> if (Subtarget->hasNEON() && emitFPU) {
> /* NEON is not exactly a VFP architecture, but GAS emit one of
> - * neon/vfpv3/vfpv2 for .fpu parameters */
> - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
> + * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
> + if (Subtarget->hasNEONVFP4())
> + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4");
> + else
> + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
> /* If emitted for NEON, omit from VFP below, since you can have both
> * NEON and VFP in build attributes but only one .fpu */
> emitFPU = false;
> }
>
> + /* VFPv4 + .fpu */
> + if (Subtarget->hasVFP4()) {
> + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
> + ARMBuildAttrs::AllowFPv4A);
> + if (emitFPU)
> + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
> +
> /* VFPv3 + .fpu */
> - if (Subtarget->hasVFP3()) {
> + } else if (Subtarget->hasVFP3()) {
> AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
> ARMBuildAttrs::AllowFPv3A);
> if (emitFPU)
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Sun Jan 22 06:07:33 2012
> @@ -179,8 +179,14 @@
> AssemblerPredicate<"FeatureVFP2">;
> def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
> AssemblerPredicate<"FeatureVFP3">;
> +def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
> + AssemblerPredicate<"FeatureVFP4">;
> +def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">;
> def HasNEON : Predicate<"Subtarget->hasNEON()">,
> AssemblerPredicate<"FeatureNEON">;
> +def HasNEONVFP4 : Predicate<"Subtarget->hasNEONVFP4()">,
> + AssemblerPredicate<"FeatureNEONVFP4">;
> +def NoNEONVFP4 : Predicate<"!Subtarget->hasNEONVFP4()">;
> def HasFP16 : Predicate<"Subtarget->hasFP16()">,
> AssemblerPredicate<"FeatureFP16">;
> def HasDivide : Predicate<"Subtarget->hasDivide()">,
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Sun Jan 22 06:07:33 2012
> @@ -3897,10 +3897,10 @@
> IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
> def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
> v2f32, fmul_su, fadd_mlx>,
> - Requires<[HasNEON, UseFPVMLx]>;
> + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
> def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
> v4f32, fmul_su, fadd_mlx>,
> - Requires<[HasNEON, UseFPVMLx]>;
> + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
> defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
> IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
> def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
> @@ -3955,10 +3955,10 @@
> IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
> def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
> v2f32, fmul_su, fsub_mlx>,
> - Requires<[HasNEON, UseFPVMLx]>;
> + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
> def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
> v4f32, fmul_su, fsub_mlx>,
> - Requires<[HasNEON, UseFPVMLx]>;
> + Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
> defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
> IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
> def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
> @@ -4007,6 +4007,24 @@
> "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
> defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
>
> +
> +// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
> +def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
> + v2f32, fmul_su, fadd_mlx>,
> + Requires<[HasNEONVFP4]>;
> +
> +def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
> + v4f32, fmul_su, fadd_mlx>,
> + Requires<[HasNEONVFP4]>;
> +
> +// Fused Vector Multiply Subtract (floating-point)
> +def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
> + v2f32, fmul_su, fsub_mlx>,
> + Requires<[HasNEONVFP4]>;
> +def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
> + v4f32, fmul_su, fsub_mlx>,
> + Requires<[HasNEONVFP4]>;
> +
> // Vector Subtract Operations.
>
> // VSUB : Vector Subtract (integer and floating-point)
> @@ -5358,9 +5376,13 @@
> def : N3VSPat<fsub, VSUBfd>;
> def : N3VSPat<fmul, VMULfd>;
> def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
> - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
> + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
> def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
> - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
> + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
> +def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
> + Requires<[HasNEONVFP4, UseNEONForFP]>;
> +def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
> + Requires<[HasNEONVFP4, UseNEONForFP]>;
> def : N2VSPat<fabs, VABSfd>;
> def : N2VSPat<fneg, VNEGfd>;
> def : N3VSPat<NEONfmax, VMAXfd>;
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Sun Jan 22 06:07:33 2012
> @@ -920,7 +920,7 @@
> [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
> (f64 DPR:$Ddin)))]>,
> RegConstraint<"$Ddin = $Dd">,
> - Requires<[HasVFP2,UseFPVMLx]>;
> + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>
> def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
> (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> @@ -928,7 +928,7 @@
> [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
> SPR:$Sdin))]>,
> RegConstraint<"$Sdin = $Sd">,
> - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
> + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
> // Some single precision VFP instructions may be executed on both NEON and
> // VFP pipelines on A8.
> let D = VFPNeonA8Domain;
> @@ -936,10 +936,10 @@
>
> def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
> (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
> - Requires<[HasVFP2,UseFPVMLx]>;
> + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
> def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
> (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
> - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
> + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>;
>
> def VMLSD : ADbI<0b11100, 0b00, 1, 0,
> (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> @@ -947,7 +947,7 @@
> [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
> (f64 DPR:$Ddin)))]>,
> RegConstraint<"$Ddin = $Dd">,
> - Requires<[HasVFP2,UseFPVMLx]>;
> + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>
> def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
> (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> @@ -955,7 +955,7 @@
> [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
> SPR:$Sdin))]>,
> RegConstraint<"$Sdin = $Sd">,
> - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
> + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
> // Some single precision VFP instructions may be executed on both NEON and
> // VFP pipelines on A8.
> let D = VFPNeonA8Domain;
> @@ -963,10 +963,10 @@
>
> def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
> (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
> - Requires<[HasVFP2,UseFPVMLx]>;
> + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
> def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
> (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
> - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
> + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
>
> def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
> (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> @@ -974,7 +974,7 @@
> [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
> (f64 DPR:$Ddin)))]>,
> RegConstraint<"$Ddin = $Dd">,
> - Requires<[HasVFP2,UseFPVMLx]>;
> + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>
> def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
> (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> @@ -982,7 +982,7 @@
> [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
> SPR:$Sdin))]>,
> RegConstraint<"$Sdin = $Sd">,
> - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
> + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
> // Some single precision VFP instructions may be executed on both NEON and
> // VFP pipelines on A8.
> let D = VFPNeonA8Domain;
> @@ -990,10 +990,10 @@
>
> def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
> (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
> - Requires<[HasVFP2,UseFPVMLx]>;
> + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
> def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
> (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
> - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
> + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
>
> def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
> (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> @@ -1001,14 +1001,14 @@
> [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
> (f64 DPR:$Ddin)))]>,
> RegConstraint<"$Ddin = $Dd">,
> - Requires<[HasVFP2,UseFPVMLx]>;
> + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
>
> def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
> (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
> [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
> RegConstraint<"$Sdin = $Sd">,
> - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
> + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> {
> // Some single precision VFP instructions may be executed on both NEON and
> // VFP pipelines on A8.
> let D = VFPNeonA8Domain;
> @@ -1016,11 +1016,116 @@
>
> def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
> (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
> - Requires<[HasVFP2,UseFPVMLx]>;
> + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>;
> def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
> (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
> - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
> + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>;
>
> +//===----------------------------------------------------------------------===//
> +// Fused FP Multiply-Accumulate Operations.
> +//
> +def VFMAD : ADbI<0b11101, 0b10, 0, 0,
> + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> + IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
> + [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
> + (f64 DPR:$Ddin)))]>,
> + RegConstraint<"$Ddin = $Dd">,
> + Requires<[HasVFP4]>;
> +
> +def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
> + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> + IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
> + [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
> + SPR:$Sdin))]>,
> + RegConstraint<"$Sdin = $Sd">,
> + Requires<[HasVFP4,DontUseNEONForFP]> {
> + // Some single precision VFP instructions may be executed on both NEON and
> + // VFP pipelines.
> +}
> +
> +def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
> + (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
> + Requires<[HasVFP4]>;
> +def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
> + (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
> + Requires<[HasVFP4,DontUseNEONForFP]>;
> +
> +def VFMSD : ADbI<0b11101, 0b10, 1, 0,
> + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> + IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
> + [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
> + (f64 DPR:$Ddin)))]>,
> + RegConstraint<"$Ddin = $Dd">,
> + Requires<[HasVFP4]>;
> +
> +def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
> + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> + IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
> + [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
> + SPR:$Sdin))]>,
> + RegConstraint<"$Sdin = $Sd">,
> + Requires<[HasVFP4,DontUseNEONForFP]> {
> + // Some single precision VFP instructions may be executed on both NEON and
> + // VFP pipelines.
> +}
> +
> +def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
> + (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
> + Requires<[HasVFP4]>;
> +def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
> + (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
> + Requires<[HasVFP4,DontUseNEONForFP]>;
> +
> +def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
> + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> + IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
> + [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
> + (f64 DPR:$Ddin)))]>,
> + RegConstraint<"$Ddin = $Dd">,
> + Requires<[HasVFP4]>;
> +
> +def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
> + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> + IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
> + [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
> + SPR:$Sdin))]>,
> + RegConstraint<"$Sdin = $Sd">,
> + Requires<[HasVFP4,DontUseNEONForFP]> {
> + // Some single precision VFP instructions may be executed on both NEON and
> + // VFP pipelines.
> +}
> +
> +def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
> + (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
> + Requires<[HasVFP4]>;
> +def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
> + (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
> + Requires<[HasVFP4,DontUseNEONForFP]>;
> +
> +def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
> + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
> + IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
> + [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
> + (f64 DPR:$Ddin)))]>,
> + RegConstraint<"$Ddin = $Dd">,
> + Requires<[HasVFP4]>;
> +
> +def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
> + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
> + IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
> + [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
> + RegConstraint<"$Sdin = $Sd">,
> + Requires<[HasVFP4,DontUseNEONForFP]> {
> + // Some single precision VFP instructions may be executed on both NEON and
> + // VFP pipelines.
> +}
> +
> +def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
> + (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
> + Requires<[HasVFP4]>;
> +def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
> + (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
> + Requires<[HasVFP4,DontUseNEONForFP]>;
>
> //===----------------------------------------------------------------------===//
> // FP Conditional moves.
>
> Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Sun Jan 22 06:07:33 2012
> @@ -118,6 +118,8 @@
> def IIC_fpMUL64 : InstrItinClass;
> def IIC_fpMAC32 : InstrItinClass;
> def IIC_fpMAC64 : InstrItinClass;
> +def IIC_fpFMAC32 : InstrItinClass;
> +def IIC_fpFMAC64 : InstrItinClass;
> def IIC_fpDIV32 : InstrItinClass;
> def IIC_fpDIV64 : InstrItinClass;
> def IIC_fpSQRT32 : InstrItinClass;
> @@ -208,6 +210,8 @@
> def IIC_VPERMQ3 : InstrItinClass;
> def IIC_VMACD : InstrItinClass;
> def IIC_VMACQ : InstrItinClass;
> +def IIC_VFMACD : InstrItinClass;
> +def IIC_VFMACQ : InstrItinClass;
> def IIC_VRECSD : InstrItinClass;
> def IIC_VRECSQ : InstrItinClass;
> def IIC_VCNTiD : InstrItinClass;
>
> Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp Sun Jan 22 06:07:33 2012
> @@ -47,7 +47,9 @@
> , HasV7Ops(false)
> , HasVFPv2(false)
> , HasVFPv3(false)
> + , HasVFPv4(false)
> , HasNEON(false)
> + , HasNEONVFPv4(false)
> , UseNEONForSinglePrecisionFP(false)
> , SlowFPVMLx(false)
> , HasVMLxForwarding(false)
>
> Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=148658&r1=148657&r2=148658&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original)
> +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Sun Jan 22 06:07:33 2012
> @@ -45,11 +45,13 @@
> bool HasV6T2Ops;
> bool HasV7Ops;
>
> - /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
> - /// supported.
> + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what
> + /// floating point ISAs are supported.
> bool HasVFPv2;
> bool HasVFPv3;
> + bool HasVFPv4;
> bool HasNEON;
> + bool HasNEONVFPv4;
>
> /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
> /// specified. Use the method useNEONForSinglePrecisionFP() to
> @@ -197,7 +199,9 @@
>
> bool hasVFP2() const { return HasVFPv2; }
> bool hasVFP3() const { return HasVFPv3; }
> + bool hasVFP4() const { return HasVFPv4; }
> bool hasNEON() const { return HasNEON; }
> + bool hasNEONVFP4() const { return HasNEONVFPv4; }
> bool useNEONForSinglePrecisionFP() const {
> return hasNEON() && UseNEONForSinglePrecisionFP; }
>
>
> Added: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fusedMAC.ll?rev=148658&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/fusedMAC.ll (added)
> +++ llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Sun Jan 22 06:07:33 2012
> @@ -0,0 +1,68 @@
> +; RUN: llc < %s -march=arm -mattr=+neon-vfpv4 | FileCheck %s
> +; Check generated fused MAC and MLS.
> +
> +define double @fusedMACTest1(double %d1, double %d2, double %d3) nounwind readnone noinline {
> +;CHECK: fusedMACTest1:
> +;CHECK: vfma.f64
> + %1 = fmul double %d1, %d2
> + %2 = fadd double %1, %d3
> + ret double %2
> +}
> +
> +define float @fusedMACTest2(float %f1, float %f2, float %f3) nounwind readnone noinline {
> +;CHECK: fusedMACTest2:
> +;CHECK: vfma.f32
> + %1 = fmul float %f1, %f2
> + %2 = fadd float %1, %f3
> + ret float %2
> +}
> +
> +define double @fusedMACTest3(double %d1, double %d2, double %d3) nounwind readnone noinline {
> +;CHECK: fusedMACTest3:
> +;CHECK: vfms.f64
> + %1 = fmul double %d2, %d3
> + %2 = fsub double %d1, %1
> + ret double %2
> +}
> +
> +define float @fusedMACTest4(float %f1, float %f2, float %f3) nounwind readnone noinline {
> +;CHECK: fusedMACTest4:
> +;CHECK: vfms.f32
> + %1 = fmul float %f2, %f3
> + %2 = fsub float %f1, %1
> + ret float %2
> +}
> +
> +define double @fusedMACTest5(double %d1, double %d2, double %d3) nounwind readnone noinline {
> +;CHECK: fusedMACTest5:
> +;CHECK: vfnma.f64
> + %1 = fmul double %d1, %d2
> + %2 = fsub double -0.0, %1
> + %3 = fsub double %2, %d3
> + ret double %3
> +}
> +
> +define float @fusedMACTest6(float %f1, float %f2, float %f3) nounwind {
> +;CHECK: fusedMACTest6:
> +;CHECK: vfnma.f32
> + %1 = fmul float %f1, %f2
> + %2 = fsub float -0.0, %1
> + %3 = fsub float %2, %f3
> + ret float %3
> +}
> +
> +define double @fusedMACTest7(double %d1, double %d2, double %d3) nounwind {
> +;CHECK: fusedMACTest7:
> +;CHECK: vfnms.f64
> + %1 = fmul double %d1, %d2
> + %2 = fsub double %1, %d3
> + ret double %2
> +}
> +
> +define float @fusedMACTest8(float %f1, float %f2, float %f3) nounwind {
> +;CHECK: fusedMACTest8:
> +;CHECK: vfnms.f32
> + %1 = fmul float %f1, %f2
> + %2 = fsub float %1, %f3
> + ret float %2
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
--
Bruno Cardoso Lopes
http://www.brunocardoso.cc
More information about the llvm-commits
mailing list