[llvm] r303717 - [ARM] Add VLDx/VSTx sched defs for machine-schedulers. NFCI
Javed Absar via llvm-commits
llvm-commits at lists.llvm.org
Tue May 23 22:32:50 PDT 2017
Author: javed.absar
Date: Wed May 24 00:32:48 2017
New Revision: 303717
URL: http://llvm.org/viewvc/llvm-project?rev=303717&view=rev
Log:
[ARM] Add VLDx/VSTx sched defs for machine-schedulers. NFCI
This patch adds missing scheds for Neon VLDx/VSTx instructions.
This will help one write schedulers easier/faster in the future for ARM sub-targets.
Existing models will not affected by this patch.
Reviewed by: Renato Golin, Diana Picus
Differential Revision: https://reviews.llvm.org/D33120
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
llvm/trunk/lib/Target/ARM/ARMSchedule.td
llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
llvm/trunk/lib/Target/ARM/ARMScheduleR52.td
llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=303717&r1=303716&r2=303717&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed May 24 00:32:48 2017
@@ -674,7 +674,7 @@ let mayLoad = 1, hasSideEffects = 0, has
class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
(ins AddrMode:$Rn), IIC_VLD1,
- "vld1", Dt, "$Vd, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -682,7 +682,7 @@ class VLD1D<bits<4> op7_4, string Dt, Op
class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
(ins AddrMode:$Rn), IIC_VLD1x2,
- "vld1", Dt, "$Vd, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -703,7 +703,7 @@ multiclass VLD1DWB<bits<4> op7_4, string
def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -711,7 +711,7 @@ multiclass VLD1DWB<bits<4> op7_4, string
def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -720,7 +720,7 @@ multiclass VLD1QWB<bits<4> op7_4, string
def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -728,7 +728,7 @@ multiclass VLD1QWB<bits<4> op7_4, string
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -747,7 +747,7 @@ defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64"
class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
(ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
- "$Vd, $Rn", "", []> {
+ "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -756,7 +756,7 @@ multiclass VLD1D3WB<bits<4> op7_4, strin
def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -764,7 +764,7 @@ multiclass VLD1D3WB<bits<4> op7_4, strin
def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -780,15 +780,15 @@ defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "1
defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
-def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
-def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
-def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
// ...with 4 registers
class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
(ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
- "$Vd, $Rn", "", []> {
+ "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -797,7 +797,7 @@ multiclass VLD1D4WB<bits<4> op7_4, strin
def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -805,7 +805,7 @@ multiclass VLD1D4WB<bits<4> op7_4, strin
def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -821,9 +821,9 @@ defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "
defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
-def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
-def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
-def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
@@ -837,22 +837,22 @@ class VLD2<bits<4> op11_8, bits<4> op7_4
}
def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
-def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
-def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
-def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
+def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
+def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
// ...with address register writeback:
multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
@@ -875,45 +875,45 @@ multiclass VLD2WB<bits<4> op11_8, bits<4
}
defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVLD4]>;
-def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
-def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
-def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
-def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
-def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
-def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
// ...with double-spaced registers
def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVLD2]>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$Rn), IIC_VLD3,
- "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
@@ -923,9 +923,9 @@ def VLD3d8 : VLD3D<0b0100, {0,0,0,?},
def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
-def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
-def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
-def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -933,7 +933,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
"vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
}
@@ -942,9 +942,9 @@ def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0
def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
-def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
-def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
-def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
// ...with double-spaced registers:
def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
@@ -954,25 +954,26 @@ def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0
def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
-def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
// ...alternate versions to be allocated odd register numbers:
-def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
-def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
-def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
-
-def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
-def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
+
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$Rn), IIC_VLD4,
- "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
+ Sched<[WriteVLD4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
@@ -982,9 +983,9 @@ def VLD4d8 : VLD4D<0b0000, {0,0,?,?},
def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
-def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
-def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
-def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -992,7 +993,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
"vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
}
@@ -1001,9 +1002,9 @@ def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?
def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
-def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
-def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
-def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
// ...with double-spaced registers:
def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
@@ -1013,18 +1014,18 @@ def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?
def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
-def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
// ...alternate versions to be allocated odd register numbers:
-def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
-def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
-def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
-
-def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
+
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1076,11 +1077,12 @@ class VLD1LN32<bits<4> op11_8, bits<4> o
"$src = $Vd",
[(set DPR:$Vd, (vector_insert (Ty DPR:$src),
(i32 (LoadOp addrmode6oneL32:$Rn)),
- imm:$lane))]> {
+ imm:$lane))]>, Sched<[WriteVLD1]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD1LN";
}
-class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
+ Sched<[WriteVLD1]> {
let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
(i32 (LoadOp addrmode6:$addr)),
imm:$lane))];
@@ -1117,7 +1119,7 @@ class VLD1LNWB<bits<4> op11_8, bits<4> o
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
- "$src = $Vd, $Rn.addr = $wb", []> {
+ "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let DecoderMethod = "DecodeVLD1LN";
}
@@ -1134,16 +1136,16 @@ def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,
let Inst{4} = Rn{4};
}
-def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
-def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
-def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
// VLD2LN : Vector Load (single 2-element structure to one lane)
class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
- "$src1 = $Vd, $src2 = $dst2", []> {
+ "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2LN";
@@ -1159,9 +1161,9 @@ def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}
let Inst{7} = lane{0};
}
-def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
-def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
-def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
// ...with double-spaced registers:
def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
@@ -1171,8 +1173,8 @@ def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}
let Inst{7} = lane{0};
}
-def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
-def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
// ...with address register writeback:
class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1195,9 +1197,9 @@ def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,
let Inst{7} = lane{0};
}
-def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
-def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
-def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -1206,8 +1208,8 @@ def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,
let Inst{7} = lane{0};
}
-def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
-def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1215,7 +1217,7 @@ class VLD3LN<bits<4> op11_8, bits<4> op7
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
- "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVLD3LN";
}
@@ -1230,9 +1232,9 @@ def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}
let Inst{7} = lane{0};
}
-def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
-def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
-def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
// ...with double-spaced registers:
def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
@@ -1242,8 +1244,8 @@ def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}
let Inst{7} = lane{0};
}
-def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
-def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
// ...with address register writeback:
class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1254,7 +1256,7 @@ class VLD3LNWB<bits<4> op11_8, bits<4> o
IIC_VLD3lnu, "vld3", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
- []> {
+ []>, Sched<[WriteVLD2]> {
let DecoderMethod = "DecodeVLD3LN";
}
@@ -1268,9 +1270,9 @@ def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,
let Inst{7} = lane{0};
}
-def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
-def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
-def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -1279,8 +1281,8 @@ def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,
let Inst{7} = lane{0};
}
-def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
-def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1289,7 +1291,8 @@ class VLD4LN<bits<4> op11_8, bits<4> op7
(ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
- "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
+ Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4LN";
@@ -1306,9 +1309,9 @@ def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}
let Inst{5} = Rn{5};
}
-def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
-def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
-def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
// ...with double-spaced registers:
def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
@@ -1319,8 +1322,8 @@ def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}
let Inst{5} = Rn{5};
}
-def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
-def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
// ...with address register writeback:
class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1347,9 +1350,9 @@ def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,
let Inst{5} = Rn{5};
}
-def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
-def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
-def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -1359,8 +1362,8 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,
let Inst{5} = Rn{5};
}
-def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
-def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1371,7 +1374,8 @@ class VLD1DUP<bits<4> op7_4, string Dt,
(ins AddrMode:$Rn),
IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
[(set VecListOneDAllLanes:$Vd,
- (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
+ (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
+ Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1434,7 +1438,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, str
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1491,7 +1495,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, stri
(outs VdTy:$Vd, GPR:$wb),
(ins AddrMode:$Rn), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
@@ -1500,7 +1504,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, stri
(outs VdTy:$Vd, GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
@@ -1524,7 +1528,8 @@ defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?
class VLD3DUP<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
(ins addrmode6dup:$Rn), IIC_VLD3dup,
- "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
+ Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
@@ -1534,9 +1539,9 @@ def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">
def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
-def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
-def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
-def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
// ...with double-spaced registers (not used for codegen):
def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
@@ -1548,7 +1553,7 @@ class VLD3DUPWB<bits<4> op7_4, string Dt
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
}
@@ -1561,9 +1566,9 @@ def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}
def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
-def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
-def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
-def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
// VLD4DUP : Vector Load (single 4-element structure to all lanes)
class VLD4DUP<bits<4> op7_4, string Dt>
@@ -1580,9 +1585,9 @@ def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">
def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
-def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
-def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
-def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
// ...with double-spaced registers (not used for codegen):
def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
@@ -1595,7 +1600,7 @@ class VLD4DUPWB<bits<4> op7_4, string Dt
(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
"vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD4DupInstruction";
}
@@ -1608,9 +1613,9 @@ def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}
def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
-def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
-def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
-def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1657,14 +1662,14 @@ class VSTQQQQWBPseudo<InstrItinClass iti
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
- IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
+ IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
- IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
+ IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1685,7 +1690,7 @@ multiclass VST1DWB<bits<4> op7_4, string
def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1694,7 +1699,7 @@ multiclass VST1DWB<bits<4> op7_4, string
(ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1703,7 +1708,7 @@ multiclass VST1QWB<bits<4> op7_4, string
def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1712,7 +1717,7 @@ multiclass VST1QWB<bits<4> op7_4, string
(ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1732,7 +1737,7 @@ defm VST1q64wb : VST1QWB<{1,1,?,?}, "64"
class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
(ins AddrMode:$Rn, VecListThreeD:$Vd),
- IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
+ IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1741,7 +1746,7 @@ multiclass VST1D3WB<bits<4> op7_4, strin
def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1750,7 +1755,7 @@ multiclass VST1D3WB<bits<4> op7_4, strin
(ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1766,16 +1771,16 @@ defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "1
defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
-def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>;
-def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
+def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
// ...with 4 registers
class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
(ins AddrMode:$Rn, VecListFourD:$Vd),
IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
- []> {
+ []>, Sched<[WriteVST4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1784,7 +1789,7 @@ multiclass VST1D4WB<bits<4> op7_4, strin
def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
@@ -1793,7 +1798,7 @@ multiclass VST1D4WB<bits<4> op7_4, strin
(ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
@@ -1809,9 +1814,9 @@ defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "1
defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
-def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>;
-def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
+def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
// VST2 : Vector Store (multiple 2-element structures)
class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
@@ -1824,22 +1829,22 @@ class VST2<bits<4> op11_8, bits<4> op7_4
}
def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVST2]>;
def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVST2]>;
def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
- addrmode6align64or128>;
+ addrmode6align64or128>, Sched<[WriteVST2]>;
def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVST4]>;
def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVST4]>;
def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
- addrmode6align64or128or256>;
+ addrmode6align64or128or256>, Sched<[WriteVST4]>;
-def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
-def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
-def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
+def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
+def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
// ...with address register writeback:
multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
@@ -1847,7 +1852,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<
def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
@@ -1855,7 +1860,7 @@ multiclass VST2DWB<bits<4> op11_8, bits<
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
@@ -1864,7 +1869,7 @@ multiclass VST2QWB<bits<4> op7_4, string
def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
@@ -1873,7 +1878,7 @@ multiclass VST2QWB<bits<4> op7_4, string
(ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
@@ -1890,12 +1895,12 @@ defm VST2q8wb : VST2QWB<{0,0,?,?}, "8
defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
-def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
-def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
-def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
-def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
-def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
-def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
+def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
// ...with double-spaced registers
def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
@@ -1915,7 +1920,7 @@ defm VST2b32wb : VST2DWB<0b1001, {1,0,?
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
- "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
@@ -1925,9 +1930,9 @@ def VST3d8 : VST3D<0b0100, {0,0,0,?},
def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
-def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
-def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
-def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1935,7 +1940,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
"vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST3Instruction";
}
@@ -1944,9 +1949,9 @@ def VST3d8_UPD : VST3DWB<0b0100, {0,0,0
def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
-def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
-def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
-def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
// ...with double-spaced registers:
def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
@@ -1956,25 +1961,25 @@ def VST3q8_UPD : VST3DWB<0b0101, {0,0,0
def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
-def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
// ...alternate versions to be allocated odd register numbers:
-def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
-def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
-def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
-
-def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
-def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
+
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
- "", []> {
+ "", []>, Sched<[WriteVST4]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
@@ -1984,9 +1989,9 @@ def VST4d8 : VST4D<0b0000, {0,0,?,?},
def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
-def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
-def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
-def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1994,7 +1999,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
"vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
+ "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST4Instruction";
}
@@ -2003,9 +2008,9 @@ def VST4d8_UPD : VST4DWB<0b0000, {0,0,?
def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
-def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
-def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
-def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
// ...with double-spaced registers:
def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
@@ -2015,18 +2020,18 @@ def VST4q8_UPD : VST4DWB<0b0001, {0,0,?
def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
-def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
// ...alternate versions to be allocated odd register numbers:
-def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
-def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
-def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
-
-def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
+
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
@@ -2060,12 +2065,13 @@ class VST1LN<bits<4> op11_8, bits<4> op7
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
- [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
+ Sched<[WriteVST1]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
- : VSTQLNPseudo<IIC_VST1ln> {
+ : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr)];
}
@@ -2104,11 +2110,12 @@ class VST1LNWB<bits<4> op11_8, bits<4> o
"\\{$Vd[$lane]\\}, $Rn$Rm",
"$Rn.addr = $wb",
[(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
- AdrMode:$Rn, am6offset:$Rm))]> {
+ AdrMode:$Rn, am6offset:$Rm))]>,
+ Sched<[WriteVST1]> {
let DecoderMethod = "DecodeVST1LN";
}
class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
- : VSTQLNWBPseudo<IIC_VST1lnu> {
+ : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr, am6offset:$offset))];
}
@@ -2139,7 +2146,7 @@ class VST2LN<bits<4> op11_8, bits<4> op7
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
- "", []> {
+ "", []>, Sched<[WriteVST1]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
@@ -2155,9 +2162,9 @@ def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}
let Inst{7} = lane{0};
}
-def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
-def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
-def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
// ...with double-spaced registers:
def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
@@ -2169,8 +2176,8 @@ def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}
let Inst{4} = Rn{4};
}
-def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
-def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2193,9 +2200,9 @@ def VST2LNd32_UPD : VST2LNWB<0b1001, {?,
let Inst{7} = lane{0};
}
-def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
-def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
-def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -2204,15 +2211,16 @@ def VST2LNq32_UPD : VST2LNWB<0b1001, {?,
let Inst{7} = lane{0};
}
-def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
-def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
- "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
+ Sched<[WriteVST2]> {
let Rm = 0b1111;
let DecoderMethod = "DecodeVST3LN";
}
@@ -2227,9 +2235,9 @@ def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}
let Inst{7} = lane{0};
}
-def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
-def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
-def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
// ...with double-spaced registers:
def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
@@ -2263,9 +2271,9 @@ def VST3LNd32_UPD : VST3LNWB<0b1010, {?,
let Inst{7} = lane{0};
}
-def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
-def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
-def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -2274,8 +2282,8 @@ def VST3LNq32_UPD : VST3LNWB<0b1010, {?,
let Inst{7} = lane{0};
}
-def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
-def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2283,7 +2291,7 @@ class VST4LN<bits<4> op11_8, bits<4> op7
(ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
"\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
- "", []> {
+ "", []>, Sched<[WriteVST2]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST4LN";
@@ -2300,9 +2308,9 @@ def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}
let Inst{5} = Rn{5};
}
-def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
-def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
-def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
// ...with double-spaced registers:
def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
@@ -2313,8 +2321,8 @@ def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}
let Inst{5} = Rn{5};
}
-def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
-def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
// ...with address register writeback:
class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2339,9 +2347,9 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,
let Inst{5} = Rn{5};
}
-def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
-def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
-def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
let Inst{7-6} = lane{1-0};
@@ -2351,8 +2359,8 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,
let Inst{5} = Rn{5};
}
-def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
-def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
Modified: llvm/trunk/lib/Target/ARM/ARMSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSchedule.td?rev=303717&r1=303716&r2=303717&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSchedule.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMSchedule.td Wed May 24 00:32:48 2017
@@ -131,6 +131,17 @@ def WriteFPDIV64 : SchedWrite;
def WriteFPSQRT32 : SchedWrite;
def WriteFPSQRT64 : SchedWrite;
+// Vector load and stores
+def WriteVLD1 : SchedWrite;
+def WriteVLD2 : SchedWrite;
+def WriteVLD3 : SchedWrite;
+def WriteVLD4 : SchedWrite;
+def WriteVST1 : SchedWrite;
+def WriteVST2 : SchedWrite;
+def WriteVST3 : SchedWrite;
+def WriteVST4 : SchedWrite;
+
+
// Define TII for use in SchedVariant Predicates.
def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
Modified: llvm/trunk/lib/Target/ARM/ARMScheduleA9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA9.td?rev=303717&r1=303716&r2=303717&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleA9.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleA9.td Wed May 24 00:32:48 2017
@@ -1981,6 +1981,15 @@ def A9WriteV7 : SchedWriteRes<[A9UnitFP,
def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
+def : WriteRes<WriteVLD1, []>;
+def : WriteRes<WriteVLD2, []>;
+def : WriteRes<WriteVLD3, []>;
+def : WriteRes<WriteVLD4, []>;
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
// Reserve A9UnitFP for 2 consecutive cycles.
def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
let Latency = 4;
Modified: llvm/trunk/lib/Target/ARM/ARMScheduleR52.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleR52.td?rev=303717&r1=303716&r2=303717&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleR52.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleR52.td Wed May 24 00:32:48 2017
@@ -120,6 +120,12 @@ def : WriteRes<WriteFPDIV64, [R52UnitDiv
def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
+// Overriden via InstRW for this processor.
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
@@ -712,20 +718,20 @@ def R52WriteSTM : SchedWriteVariant<[
// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
// another instruction in slot-1, but only in the last issue.
-def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
-def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
+def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
+def : WriteRes<WriteVLD2, [R52UnitLd]> {
let Latency = 6;
let NumMicroOps = 3;
let ResourceCycles = [2];
let SingleIssue = 1;
}
-def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
+def : WriteRes<WriteVLD3, [R52UnitLd]> {
let Latency = 7;
let NumMicroOps = 5;
let ResourceCycles = [3];
let SingleIssue = 1;
}
-def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
+def : WriteRes<WriteVLD4, [R52UnitLd]> {
let Latency = 8;
let NumMicroOps = 7;
let ResourceCycles = [4];
@@ -829,95 +835,6 @@ def : InstRW<[R52WriteFPALU_F4, R52Read_
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
//---
-// VLDx. Vector Loads
-//---
-// 1-element structure load
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
-
-// 2-element structure load
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
-def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
-
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
-def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
-
-// 3-element structure load
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
-def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
-
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
-
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
-
-// 4-element structure load
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
-
-
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
-def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
-def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
-
-//---
// VSTx. Vector Stores
//---
// 1-element structure store
Modified: llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td?rev=303717&r1=303716&r2=303717&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td Wed May 24 00:32:48 2017
@@ -1070,6 +1070,16 @@ let SchedModel = SwiftModel in {
def : ReadAdvance<ReadFPMUL, 0>;
def : ReadAdvance<ReadFPMAC, 0>;
+ // Overriden via InstRW for this processor.
+ def : WriteRes<WriteVLD1, []>;
+ def : WriteRes<WriteVLD2, []>;
+ def : WriteRes<WriteVLD3, []>;
+ def : WriteRes<WriteVLD4, []>;
+ def : WriteRes<WriteVST1, []>;
+ def : WriteRes<WriteVST2, []>;
+ def : WriteRes<WriteVST3, []>;
+ def : WriteRes<WriteVST4, []>;
+
// Not specified.
def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
// Preload.
More information about the llvm-commits
mailing list