[llvm] r292825 - [ARM] Classification Improvements to ARM Sched-Models. NFCI.
Javed Absar via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 23 12:20:39 PST 2017
Author: javed.absar
Date: Mon Jan 23 14:20:39 2017
New Revision: 292825
URL: http://llvm.org/viewvc/llvm-project?rev=292825&view=rev
Log:
[ARM] Classification Improvements to ARM Sched-Models. NFCI.
This is a series of patches to enable adding of machine sched
models for ARM processors easier and compact. They define new
sched-readwrites for groups of ARM instructions. This has been
missing so far, and as a consequence, machine scheduler models
for individual sub-targets have tended to be larger than they
needed to be.
The current patch focuses on floating-point instructions.
Reviewers: Diana Picus (rovka), Renato Golin (rengolin)
Differential Revision: https://reviews.llvm.org/D28194
Added:
llvm/trunk/test/CodeGen/ARM/misched-fp-basic.ll
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
llvm/trunk/lib/Target/ARM/ARMSchedule.td
llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
llvm/trunk/lib/Target/ARM/ARMScheduleR52.td
llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td
Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=292825&r1=292824&r2=292825&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Mon Jan 23 14:20:39 2017
@@ -336,13 +336,15 @@ let TwoOperandAliasConstraint = "$Dn = $
def VADDD : ADbI<0b11100, 0b11, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPALU64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDS : ASbIn<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPALU32]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -352,19 +354,22 @@ let TwoOperandAliasConstraint = "$Sn = $
def VADDH : AHbI<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPALU64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPALU32]>{
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -374,37 +379,43 @@ let TwoOperandAliasConstraint = "$Sn = $
def VSUBH : AHbI<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPDIV64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVS : ASbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVH : AHbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VMULD : ADbI<0b11100, 0b10, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VMULS : ASbIn<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -414,17 +425,20 @@ let TwoOperandAliasConstraint = "$Sn = $
def VMULH : AHbI<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>,
+ Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
def VNMULS : ASbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -433,7 +447,8 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
def VNMULH : AHbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
multiclass vsel_inst<string op, bits<2> opc, int CC> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
@@ -624,7 +639,8 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
- [(set DPR:$Dd, (fpextend SPR:$Sm))]> {
+ [(set DPR:$Dd, (fpextend SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Dd;
bits<5> Sm;
@@ -641,7 +657,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111
// Special case encoding: bits 11-8 is 0b1011.
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (fpround DPR:$Dm))]> {
+ [(set SPR:$Sd, (fpround DPR:$Dm))]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Sd;
bits<5> Dm;
@@ -667,27 +684,32 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Sm;
@@ -946,12 +968,14 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11,
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
- [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPSQRT64]>;
def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
- [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
+ Sched<[WriteFPSQRT32]>;
def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
@@ -987,7 +1011,8 @@ def VINSH : ASuInp<0b11101, 0b11, 0b000
def VMOVRS : AVConv2I<0b11100001, 0b1010,
(outs GPR:$Rt), (ins SPR:$Sn),
IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
- [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<4> Rt;
bits<5> Sn;
@@ -1010,7 +1035,8 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
[(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
- Requires<[HasVFP2, UseVMOVSR]> {
+ Requires<[HasVFP2, UseVMOVSR]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
@@ -1032,7 +1058,8 @@ let hasSideEffects = 0 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
- [/* FIXME: Can't write pattern for multiple result instr*/]> {
+ [/* FIXME: Can't write pattern for multiple result instr*/]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Dm;
bits<4> Rt;
@@ -1059,7 +1086,8 @@ def VMOVRRD : AVConv3I<0b11000101, 0b10
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
(outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
- [/* For disassembly only; pattern left blank */]> {
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteFPMOV]> {
bits<5> src1;
bits<4> Rt;
bits<4> Rt2;
@@ -1085,7 +1113,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b10
def VMOVDRR : AVConv5I<0b11000100, 0b1011,
(outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
- [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Dm;
bits<4> Rt;
@@ -1128,7 +1157,8 @@ let hasSideEffects = 0 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
- [/* For disassembly only; pattern left blank */]> {
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> dst1;
bits<4> src1;
@@ -1154,7 +1184,8 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001
(outs GPR:$Rt), (ins SPR:$Sn),
IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
[]>,
- Requires<[HasFullFP16]> {
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<4> Rt;
bits<5> Sn;
@@ -1173,7 +1204,8 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
[]>,
- Requires<[HasFullFP16]> {
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
@@ -1254,7 +1286,8 @@ class AVConv1IHs_Encode<bits<5> opcod1,
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
}
@@ -1269,7 +1302,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd),(ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1286,14 +1320,16 @@ def : VFPNoNEONPat<(f32 (sint_to_fp (i32
def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
}
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
}
@@ -1308,7 +1344,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1325,7 +1362,8 @@ def : VFPNoNEONPat<(f32 (uint_to_fp (i32
def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
}
@@ -1390,7 +1428,8 @@ class AVConv1IsH_Encode<bits<5> opcod1,
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
@@ -1405,7 +1444,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1423,14 +1463,16 @@ def : VFPNoNEONPat<(alignedstore32 (i32
def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
@@ -1445,7 +1487,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1463,7 +1506,8 @@ def : VFPNoNEONPat<(alignedstore32 (i32
def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
@@ -1473,42 +1517,48 @@ let Uses = [FPSCR] in {
def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
}
@@ -1528,8 +1578,7 @@ let Constraints = "$a = $dst" in {
class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
- Sched<[WriteCvtFP]> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{0};
@@ -1540,8 +1589,7 @@ class AVConv1XInsS_Encode<bits<5> op1, b
class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
- Sched<[WriteCvtFP]> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{4};
@@ -1553,26 +1601,31 @@ class AVConv1XInsD_Encode<bits<5> op1, b
def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1604,45 +1657,54 @@ def VTOULS : AVConv1XInsS_Encode<0b11101
def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
// Fixed-Point to FP:
def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1650,7 +1712,8 @@ def VSHTOS : AVConv1XInsS_Encode<0b11101
def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1658,7 +1721,8 @@ def VUHTOS : AVConv1XInsS_Encode<0b11101
def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1666,7 +1730,8 @@ def VSLTOS : AVConv1XInsS_Encode<0b11101
def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1674,19 +1739,23 @@ def VULTOS : AVConv1XInsS_Encode<0b11101
def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
} // End of 'let Constraints = "$a = $dst" in'
@@ -1700,7 +1769,8 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1708,7 +1778,8 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1734,7 +1805,8 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1742,7 +1814,8 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1768,7 +1841,8 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1776,7 +1850,8 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1802,14 +1877,16 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1838,7 +1915,8 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1846,7 +1924,8 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -1856,7 +1935,8 @@ def VFMAH : AHbI<0b11101, 0b10, 0, 0,
IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=292825&r1=292824&r2=292825&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Mon Jan 23 14:20:39 2017
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Instruction scheduling annotations for out-of-order CPUs.
+// Instruction scheduling annotations for in-order and out-of-order CPUs.
// These annotations are independent of the itinerary class defined below.
// Here we define the subtarget independent read/write per-operand resources.
// The subtarget schedule definitions will then map these to the subtarget's
@@ -54,6 +54,9 @@
// }
// def : ReadAdvance<ReadAdvanceALUsr, 3>;
+//===----------------------------------------------------------------------===//
+// Sched definitions for integer pipeline instructions
+//
// Basic ALU operation.
def WriteALU : SchedWrite;
def ReadALU : SchedRead;
@@ -81,12 +84,38 @@ def WriteBr : SchedWrite;
def WriteBrL : SchedWrite;
def WriteBrTbl : SchedWrite;
-// Fixpoint conversions.
-def WriteCvtFP : SchedWrite;
-
// Noop.
def WriteNoop : SchedWrite;
+//===----------------------------------------------------------------------===//
+// Sched definitions for floating-point and neon instructions
+//
+// Floating point conversions
+def WriteFPCVT : SchedWrite;
+def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa
+
+// ALU operations (32/64-bit)
+def WriteFPALU32 : SchedWrite;
+def WriteFPALU64 : SchedWrite;
+
+// Multiplication
+def WriteFPMUL32 : SchedWrite;
+def WriteFPMUL64 : SchedWrite;
+def ReadFPMUL : SchedRead; // multiplier read
+def ReadFPMAC : SchedRead; // accumulator read
+
+// Multiply-accumulate
+def WriteFPMAC32 : SchedWrite;
+def WriteFPMAC64 : SchedWrite;
+
+// Division
+def WriteFPDIV32 : SchedWrite;
+def WriteFPDIV64 : SchedWrite;
+
+// Square-root
+def WriteFPSQRT32 : SchedWrite;
+def WriteFPSQRT64 : SchedWrite;
+
// Define TII for use in SchedVariant Predicates.
def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
Modified: llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA9.td?rev=292825&r1=292824&r2=292825&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleA9.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleA9.td Mon Jan 23 14:20:39 2017
@@ -2471,6 +2471,33 @@ def : SchedAlias<WriteALUsr, A9WriteALUs
def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
def : SchedAlias<ReadALU, A9ReadALU>;
def : SchedAlias<ReadALUsr, A9ReadALU>;
+
+// ===---------------------------------------------------------------------===//
+// Floating-point. Map target defined SchedReadWrite to processor specific ones
+//
+def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def : SchedAlias<WriteFPMOV, A9WriteFMov>;
+
+def : SchedAlias<WriteFPALU32, A9WriteF>;
+def : SchedAlias<WriteFPALU64, A9WriteF>;
+
+def : SchedAlias<WriteFPMUL32, A9WriteFMulS>;
+def : SchedAlias<WriteFPMUL64, A9WriteFMulD>;
+
+def : SchedAlias<WriteFPMAC32, A9WriteFMAS>;
+def : SchedAlias<WriteFPMAC64, A9WriteFMAD>;
+
+def : SchedAlias<WriteFPDIV32, A9WriteFDivS>;
+def : SchedAlias<WriteFPDIV64, A9WriteFDivD>;
+def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>;
+def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>;
+
+def : ReadAdvance<ReadFPMUL, 0>;
+def : ReadAdvance<ReadFPMAC, 0>;
+
+// ===---------------------------------------------------------------------===//
+// Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types.
+//
def : InstRW< [WriteALU],
(instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
"BICrr")>;
@@ -2524,6 +2551,5 @@ def : WriteRes<WriteBr, [A9UnitB]>;
def : WriteRes<WriteBrL, [A9UnitB]>;
def : WriteRes<WriteBrTbl, [A9UnitB]>;
def : WriteRes<WritePreLd, []>;
-def : SchedAlias<WriteCvtFP, A9WriteF>;
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
} // SchedModel = CortexA9Model
Modified: llvm/trunk/lib/Target/ARM/ARMScheduleR52.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleR52.td?rev=292825&r1=292824&r2=292825&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleR52.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleR52.td Mon Jan 23 14:20:39 2017
@@ -86,12 +86,45 @@ def : WriteRes<WriteBrTbl, [R52UnitALU]>
// Misc
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
-def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; }
+// Integer pipeline by-passes
def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
+// Floating-point. Map target-defined SchedReadWrites to subtarget
+def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
+
+def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
+ let Latency = 6;
+}
+
+def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
+ let Latency = 11; // as it is internally two insns (MUL then ADD)
+}
+
+def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
+ R52UnitFPALU, R52UnitFPALU]> {
+ let Latency = 11;
+}
+
+def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
+ let Latency = 7; // FP div takes fixed #cycles
+ let ResourceCycles = [7]; // is not pipelined
+}
+
+def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
+ let Latency = 17;
+ let ResourceCycles = [17];
+}
+
+def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
+def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
+
+def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
+def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
+
+
//===----------------------------------------------------------------------===//
// Subtarget-specific SchedReadWrites.
@@ -147,19 +180,17 @@ def R52Write2FPMAC_F5 : SchedWriteRes<[
def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
-def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> {
- let Latency = 7; // FP div takes fixed #cycles
- let ResourceCycles = [7]; // is not pipelined
- }
-def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> {
- let Latency = 17;
- let ResourceCycles = [17];
-}
-
-
//===----------------------------------------------------------------------===//
-// Subtarget-specific - map operands to SchedReadWrites
+// Floating-point. Map target defined SchedReadWrites to processor specific ones
+//
+def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
+def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
+def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
+def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
+//===----------------------------------------------------------------------===//
+// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
+//
def : InstRW<[WriteALU], (instrs COPY)>;
def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
@@ -492,12 +523,6 @@ def : InstRW<[R52Write2FPALU_F3, R52Read
def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
-def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
-def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
-
-def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
- (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
-
def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
@@ -777,9 +802,8 @@ def : InstRW<[R52Write2FPALU_F4, R52Read
def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
-def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
-def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
Modified: llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td?rev=292825&r1=292824&r2=292825&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td Mon Jan 23 14:20:39 2017
@@ -597,8 +597,6 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftWriteP1FourCycle],
(instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
"VMULL", "VQDMULL")>;
- def : InstRW<[SwiftWriteP1SixCycle],
- (instregex "VMULD", "VNMULD")>;
def : InstRW<[SwiftWriteP1FourCycle],
(instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
"VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
@@ -607,8 +605,6 @@ let SchedModel = SwiftModel in {
// 4.2.36 Advanced SIMD and VFP, Convert
def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
- // Fixpoint conversions.
- def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
// 4.2.37 Advanced SIMD and VFP, Move
def : InstRW<[SwiftWriteP0TwoCycle],
@@ -1036,6 +1032,30 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
+ // ===---------------------------------------------------------------------===//
+ // Floating-point. Map target defined SchedReadWrite to processor specific ones
+ //
+ def : SchedAlias<WriteFPCVT, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMOV, SwiftWriteP2ThreeCycle>;
+
+ def : SchedAlias<WriteFPALU32, SwiftWriteP0FourCycle>;
+ def : SchedAlias<WriteFPALU64, SwiftWriteP0SixCycle>;
+
+ def : SchedAlias<WriteFPMUL32, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMUL64, SwiftWriteP1SixCycle>;
+
+ def : SchedAlias<WriteFPMAC32, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMAC64, SwiftWriteP1FourCycle>;
+
+ def : SchedAlias<WriteFPDIV32, SwiftDiv17>;
+ def : SchedAlias<WriteFPSQRT32, SwiftDiv17>;
+
+ def : SchedAlias<WriteFPDIV64, SwiftDiv32>;
+ def : SchedAlias<WriteFPSQRT64, SwiftDiv32>;
+
+ def : ReadAdvance<ReadFPMUL, 0>;
+ def : ReadAdvance<ReadFPMAC, 0>;
+
// Not specified.
def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
// Preload.
Added: llvm/trunk/test/CodeGen/ARM/misched-fp-basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/misched-fp-basic.ll?rev=292825&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/misched-fp-basic.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/misched-fp-basic.ll Mon Jan 23 14:20:39 2017
@@ -0,0 +1,69 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a9 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
+; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=swift -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
+; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
+; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52
+;
+; Check the latency of instructions for processors with sched-models
+;
+; Function Attrs: norecurse nounwind readnone
+define i32 @foo(float %a, float %b, float %c, i32 %d) local_unnamed_addr #0 {
+entry:
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK_A9: VADDS
+; CHECK_SWIFT: VADDfd
+; CHECK_R52: VADDS
+; CHECK_A9: Latency : 5
+; CHECK_SWIFT: Latency : 4
+; CHECK_R52: Latency : 6
+;
+; CHECK_A9: VMULS
+; CHECK_SWIFT: VMULfd
+; CHECK_R52: VMULS
+; CHECK_SWIFT: Latency : 4
+; CHECK_A9: Latency : 6
+; CHECK_R52: Latency : 6
+;
+; CHECK: VDIVS
+; CHECK_SWIFT: Latency : 17
+; CHECK_A9: Latency : 16
+; CHECK_R52: Latency : 7
+;
+; CHECK: VCVTDS
+; CHECK_SWIFT: Latency : 4
+; CHECK_A9: Latency : 5
+; CHECK_R52: Latency : 6
+;
+; CHECK: VADDD
+; CHECK_SWIFT: Latency : 6
+; CHECK_A9: Latency : 5
+; CHECK_R52: Latency : 6
+;
+; CHECK: VMULD
+; CHECK_SWIFT: Latency : 6
+; CHECK_A9: Latency : 7
+; CHECK_R52: Latency : 6
+;
+; CHECK: VDIVD
+; CHECK_SWIFT: Latency : 32
+; CHECK_A9: Latency : 26
+; CHECK_R52: Latency : 17
+;
+; CHECK: VTOSIZD
+; CHECK_SWIFT: Latency : 4
+; CHECK_A9: Latency : 5
+; CHECK_R52: Latency : 6
+;
+ %add = fadd float %a, %b
+ %mul = fmul float %add, %add
+ %div = fdiv float %mul, %b
+ %conv1 = fpext float %div to double
+ %add3 = fadd double %conv1, %conv1
+ %mul4 = fmul double %add3, %add3
+ %div5 = fdiv double %mul4, %conv1
+ %conv6 = fptosi double %div5 to i32
+ ret i32 %conv6
+}
More information about the llvm-commits
mailing list