[llvm] r304108 - [AArch64][Falkor] Fix some sched details.
Geoff Berry via llvm-commits
llvm-commits at lists.llvm.org
Sun May 28 14:48:31 PDT 2017
Author: gberry
Date: Sun May 28 16:48:31 2017
New Revision: 304108
URL: http://llvm.org/viewvc/llvm-project?rev=304108&view=rev
Log:
[AArch64][Falkor] Fix some sched details.
- Remove all uses of base sched model entries and set them all to
Unsupported so all the opcodes are described in
AArch64SchedFalkorDetails.td.
- Remove entries for unsupported half-float opcodes.
- Remove entries for unsupported LSE extension opcodes.
- Add entry for MOVbaseTLS (and set Sched in base td file entry to
WriteSys) and a few other pseudo ops.
- Fix a few FP load/store with reg offset entries to use the LSLfast
predicates.
- Add Q size BIF/BIT/BSL entries.
- Fix swapped Q/D sized CLS/CLZ/CNT/RBIT entires.
- Fix pre/post increment address register latency (this operand is
always dest 0).
- Fix swapped FCVTHD/FCVTHS/FCVTDH/FCVTDS entries.
- Fix XYZ resource over usage on LD[1-4] opcodes.
Modified:
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
llvm/trunk/lib/Target/AArch64/AArch64SchedFalkor.td
llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorDetails.td
llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=304108&r1=304107&r2=304108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Sun May 28 16:48:31 2017
@@ -442,7 +442,7 @@ def MSRpstateImm4 : MSRpstateImm0_15;
// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects.
let hasSideEffects = 0 in
def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
- [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>;
+ [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
// The cycle counter PMC register is PMCCNTR_EL0.
let Predicates = [HasPerfMon] in
Modified: llvm/trunk/lib/Target/AArch64/AArch64SchedFalkor.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedFalkor.td?rev=304108&r1=304107&r2=304108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedFalkor.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedFalkor.td Sun May 28 16:48:31 2017
@@ -61,56 +61,42 @@ let SchedModel = FalkorModel in {
let SchedModel = FalkorModel in {
-def : WriteRes<WriteImm, [FalkorUnitXYZ]> { let Latency = 1; }
-def : WriteRes<WriteI, [FalkorUnitXYZ]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [FalkorUnitVXVY, FalkorUnitVXVY]>
- { let Latency = 1; let NumMicroOps = 2; }
-def : WriteRes<WriteIEReg, [FalkorUnitXYZ, FalkorUnitXYZ]>
- { let Latency = 2; let NumMicroOps = 2; }
-def : WriteRes<WriteExtr, [FalkorUnitXYZ, FalkorUnitXYZ]>
- { let Latency = 2; let NumMicroOps = 2; }
-def : WriteRes<WriteIS, [FalkorUnitXYZ]> { let Latency = 1; }
-def : WriteRes<WriteID32, [FalkorUnitX, FalkorUnitZ]>
- { let Latency = 8; let NumMicroOps = 2; }
-def : WriteRes<WriteID64, [FalkorUnitX, FalkorUnitZ]>
- { let Latency = 16; let NumMicroOps = 2; }
-def : WriteRes<WriteIM32, [FalkorUnitX]> { let Latency = 4; }
-def : WriteRes<WriteIM64, [FalkorUnitX]> { let Latency = 5; }
-def : WriteRes<WriteBr, [FalkorUnitB]> { let Latency = 1; }
-def : WriteRes<WriteBrReg, [FalkorUnitB]> { let Latency = 1; }
-def : WriteRes<WriteLD, [FalkorUnitLD]> { let Latency = 3; }
-def : WriteRes<WriteST, [FalkorUnitST, FalkorUnitSD]>
- { let Latency = 0; let NumMicroOps = 2; }
-def : WriteRes<WriteSTP, [FalkorUnitST, FalkorUnitSD]>
- { let Latency = 0; let NumMicroOps = 2; }
-def : WriteRes<WriteAdr, [FalkorUnitXYZ]> { let Latency = 1; }
-def : WriteRes<WriteLDIdx, [FalkorUnitLD]> { let Latency = 5; }
-def : WriteRes<WriteSTIdx, [FalkorUnitST, FalkorUnitSD]>
- { let Latency = 0; let NumMicroOps = 2; }
-def : WriteRes<WriteF, [FalkorUnitVXVY, FalkorUnitVXVY]>
- { let Latency = 3; let NumMicroOps = 2; }
-def : WriteRes<WriteFCmp, [FalkorUnitVXVY]> { let Latency = 2; }
-def : WriteRes<WriteFCvt, [FalkorUnitVXVY]> { let Latency = 4; }
-def : WriteRes<WriteFCopy, [FalkorUnitVXVY]> { let Latency = 4; }
-def : WriteRes<WriteFImm, [FalkorUnitVXVY]> { let Latency = 4; }
-def : WriteRes<WriteFMul, [FalkorUnitVXVY, FalkorUnitVXVY]>
- { let Latency = 6; let NumMicroOps = 2; }
-def : WriteRes<WriteFDiv, [FalkorUnitVXVY, FalkorUnitVXVY]>
- { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
-def : WriteRes<WriteV, [FalkorUnitVXVY]> { let Latency = 6; }
-def : WriteRes<WriteVLD, [FalkorUnitLD]> { let Latency = 3; }
-def : WriteRes<WriteVST, [FalkorUnitST, FalkorUnitVSD]>
- { let Latency = 0; let NumMicroOps = 2; }
+// These WriteRes entries are not used in the Falkor sched model.
+def : WriteRes<WriteImm, []> { let Unsupported = 1; }
+def : WriteRes<WriteI, []> { let Unsupported = 1; }
+def : WriteRes<WriteISReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteIEReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteExtr, []> { let Unsupported = 1; }
+def : WriteRes<WriteIS, []> { let Unsupported = 1; }
+def : WriteRes<WriteID32, []> { let Unsupported = 1; }
+def : WriteRes<WriteID64, []> { let Unsupported = 1; }
+def : WriteRes<WriteIM32, []> { let Unsupported = 1; }
+def : WriteRes<WriteIM64, []> { let Unsupported = 1; }
+def : WriteRes<WriteBr, []> { let Unsupported = 1; }
+def : WriteRes<WriteBrReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteLD, []> { let Unsupported = 1; }
+def : WriteRes<WriteST, []> { let Unsupported = 1; }
+def : WriteRes<WriteSTP, []> { let Unsupported = 1; }
+def : WriteRes<WriteAdr, []> { let Unsupported = 1; }
+def : WriteRes<WriteLDIdx, []> { let Unsupported = 1; }
+def : WriteRes<WriteSTIdx, []> { let Unsupported = 1; }
+def : WriteRes<WriteF, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCmp, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCvt, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
+def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
+def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
+def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
+def : WriteRes<WriteV, []> { let Unsupported = 1; }
+def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
+def : WriteRes<WriteVST, []> { let Unsupported = 1; }
+def : WriteRes<WriteSys, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Unsupported = 1; }
+def : WriteRes<WriteHint, []> { let Unsupported = 1; }
+def : WriteRes<WriteLDHi, []> { let Unsupported = 1; }
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
-def : WriteRes<WriteSys, []> { let Latency = 1; }
-def : WriteRes<WriteBarrier, []> { let Latency = 1; }
-def : WriteRes<WriteHint, []> { let Latency = 1; }
-
-def : WriteRes<WriteLDHi, []> { let Latency = 3; }
-
-def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
-
-// No forwarding logic is modelled yet.
+// These ReadAdvance entries are not used in the Falkor sched model.
def : ReadAdvance<ReadI, 0>;
def : ReadAdvance<ReadISReg, 0>;
def : ReadAdvance<ReadIEReg, 0>;
Modified: llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorDetails.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorDetails.td?rev=304108&r1=304107&r2=304108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorDetails.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorDetails.td Sun May 28 16:48:31 2017
@@ -22,63 +22,76 @@ include "AArch64SchedFalkorWriteRes.td"
// Miscellaneous
// -----------------------------------------------------------------------------
-def : InstRW<[WriteI], (instrs COPY)>;
+// FIXME: This could be better modeled by looking at the regclasses of the operands.
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>;
// SIMD Floating-point Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v4f16|v2i16p|v2i32p)$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(16|32|64)$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(16|32|64|v2f32|v4f16|v2i32|v4i16)$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i16|v1i32|v1i64|v2i32|v4i16)rz$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f32|v4f16)$")>;
-
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?V(v4i16|v4i32|v8i16)v$")>;
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)(v2f32|v4f16)$")>;
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i16p|v2i32p|v2i64p|v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>;
-def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>;
def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>;
-def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i32|v4i16)(_shift)?$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>;
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instrs FMULX16, FMULX32)>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instrs FMULX32)>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instrs FMULX64)>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instrs FMULX64)>;
-def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>;
-def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v8f16|v2i64p)$")>;
-def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32|v8i16)rz$")>;
-def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>;
-def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)(v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)v2f32$")>;
-def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>;
-def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32|v8f16)$")>;
-def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(FCVTL|FCVTL2)(v2i32|v4i16|v4i32|v8i16)$")>;
-def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32|v8i16)(_shift)?$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>;
-def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
+def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
+ (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>;
-def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
+def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
+ (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
-def : InstRW<[FalkorWr_3VXVY_4cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>;
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>;
-def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v4i32|v8i16|v4f32)$")>;
+def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32)$")>;
-def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
+ (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
-
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32], (instregex "^FML(A|S)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64], (instregex "^FML(A|S)v1i64_indexed$")>;
-def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32], (instregex "^FML(A|S)(v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
-def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64], (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32],
+ (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64],
+ (instregex "^FML(A|S)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32],
+ (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>;
+def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64],
+ (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
// SIMD Integer Instructions
// -----------------------------------------------------------------------------
@@ -92,12 +105,14 @@ def : InstRW<[FalkorWr_1VXVY_1cyc], (i
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>;
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
@@ -110,6 +125,8 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (i
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>;
@@ -120,10 +137,14 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (i
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>;
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^SQDMULL(i16|i32)$")>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
+ (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
+ (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
+ (instregex "^SQDMULL(i16|i32)$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
@@ -154,7 +175,7 @@ def : InstRW<[FalkorWr_2VXVY_2cyc], (i
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
-def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL2?(v8i8|v16i8)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>;
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
@@ -165,14 +186,18 @@ def : InstRW<[FalkorWr_2VXVY_3cyc], (i
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
-def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL2?(v1i64|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^SQDMULLv.*$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
+ (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
+ (instregex "^SQDMULLv.*$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>;
@@ -186,83 +211,91 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (i
def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
-def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
// SIMD Load Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[WriteVLD], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
-def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD], (instrs LD2i64)>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instrs LD2i64_POST)>;
-
-def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "LD1i(8|16|32)$")>;
-def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD3i64_POST)>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD4i64_POST)>;
-
-def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>;
-def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, WriteAdr], (instregex "^LD2i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>;
-def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instrs LD3Threev2d_POST)>;
-def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "LD3i(8|16|32)$")>;
-def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, WriteAdr], (instregex "LD3i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>;
-def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instrs LD4Fourv2d_POST)>;
-def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>;
-def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, WriteAdr], (instregex "^LD4i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, WriteAdr],(instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instrs LD2i64)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instrs LD2i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD3i64_POST)>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD4i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instrs LD3Threev2d_POST)>;
+def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instrs LD4Fourv2d_POST)>;
+def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "^LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1none_4cyc],
+ (instregex "^LD3Threev(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2none_4cyc],
+ (instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD3Threev(16b|8h|4s)$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, WriteAdr],(instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "LD3Threev(16b|8h|4s)$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc],
+ (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, WriteAdr],(instregex "LD3Threev(16b|8h|4s)_POST$")>;
-def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc],
+ (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
// Arithmetic and Logical Instructions
// -----------------------------------------------------------------------------
@@ -285,6 +318,7 @@ def : InstRW<[FalkorWr_ADDSUBsx], (i
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>;
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
@@ -293,35 +327,42 @@ def : InstRW<[FalkorWr_1VXVY_1cyc], (i
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN|XTN2)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>;
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>;
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instrs FRECPS64, FRSQRTS64)>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instrs FRECPS64, FRSQRTS64)>;
-def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],(instregex "^INSv(i32|i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
+ (instregex "^INSv(i32|i64)(gpr|lane)$")>;
def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>;
def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>;
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>;
-def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], (instrs FRECPSv4f32, FRSQRTSv4f32)>;
+def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
+ (instrs FRECPSv4f32, FRSQRTSv4f32)>;
-def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], (instrs FRECPSv2f64, FRSQRTSv2f64)>;
+def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
+ (instrs FRECPSv2f64, FRSQRTSv2f64)>;
def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>;
def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>;
@@ -335,54 +376,94 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (i
// SIMD Store Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[WriteVST], (instregex "^STR(Q|D|S|H|B)ui$")>;
-def : InstRW<[WriteVST, WriteVST], (instregex "^STPQi$")>;
-def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>;
-def : InstRW<[WriteVST], (instregex "^STP(D|S)(i)$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "^STP(D|S)(post|pre)$")>;
-def : InstRW<[FalkorWr_STRVro], (instregex "^STR(D|S|H|B)ro(W|X)$")>;
-def : InstRW<[FalkorWr_STRQro], (instregex "^STRQro(W|X)$")>;
-
-def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
-def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
-
-def : InstRW<[WriteVST, WriteVST], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
-def : InstRW<[WriteVST, WriteVST], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
-def : InstRW<[WriteVST, WriteVST], (instregex "^ST3(i8|i16|i32|i64)$")>;
-def : InstRW<[WriteVST, WriteVST], (instregex "^ST4(i8|i16|i32|i64)$")>;
-def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
-def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
-def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
-def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
-
-def : InstRW<[WriteV, WriteVST, WriteVST], (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>;
-def : InstRW<[WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>;
-
-def : InstRW<[WriteVST, WriteVST, WriteVST], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
-def : InstRW<[WriteVST, WriteVST, WriteVST], (instrs ST3Threev2d)>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST3Threev2d_POST)>;
-
-def : InstRW<[WriteV, WriteV, WriteVST, WriteVST], (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>;
-def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>;
-
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instrs ST4Fourv2d)>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST4Fourv2d_POST)>;
-
-def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST3Three(v16b|v8h|v4s)$")>;
-def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
-
-def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST4Four(v16b|v8h|v4s)$")>;
-def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STR(Q|D|S|H|B)ui$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
+ (instregex "^STR(Q|D|S|H|B)(post|pre)$")>;
+def : InstRW<[FalkorWr_STRVro], (instregex "^STR(D|S|H|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^STPQi$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2VSD_2ST_0cyc],
+ (instregex "^STPQ(post|pre)$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STP(D|S)(i)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
+ (instregex "^STP(D|S)(post|pre)$")>;
+def : InstRW<[FalkorWr_STRQro], (instregex "^STRQro(W|X)$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STUR(Q|D|S|B|H)i$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instrs STNPDi, STNPSi)>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instrs STNPQi)>;
+
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
+ (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc],
+ (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc],
+ (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
+
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST3(i8|i16|i32|i64)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST4(i8|i16|i32|i64)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+ (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+ (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+ (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+ (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc],
+ (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc],
+ (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instrs ST3Threev2d)>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc],
+ (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc],
+ (instrs ST3Threev2d_POST)>;
+
+def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc],
+ (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc],
+ (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instrs ST4Fourv2d)>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc],
+ (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc],
+ (instrs ST4Fourv2d_POST)>;
+
+def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc],
+ (instregex "^ST3Three(v16b|v8h|v4s)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc],
+ (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
+
+def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc],
+ (instregex "^ST4Four(v16b|v8h|v4s)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc],
+ (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
// Branch Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1none_0cyc], (instrs B)>;
+def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>;
def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>;
def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>;
def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>;
def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>;
@@ -399,89 +480,103 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (i
// FP Load Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[WriteLD], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
-def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
-def : InstRW<[WriteLD], (instregex "^LDUR(Q|D|S|H|B)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],
+ (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(Q|D|S|H|B)i$")>;
def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDNPQi)>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDPQi)>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDNP(D|S)i$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDP(D|S)i$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi, WriteAdr],(instregex "LDP(D|S)(pre|post)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi, WriteAdr],(instregex "^LDPQ(pre|post)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
+ (instrs LDNPQi)>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
+ (instrs LDPQi)>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
+ (instregex "LDNP(D|S)i$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
+ (instregex "LDP(D|S)i$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
+ (instregex "LDP(D|S)(pre|post)$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
+ (instregex "^LDPQ(pre|post)$")>;
// FP Data Processing Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(H|S|D)rr$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(H|S|D)r(r|i)$")>;
-def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P)(S|U)U(W|X)(H|S|D)r$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(H|S|D)r$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(H|S|D)rrr$")>;
-
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(H|S|D)rr$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(16|32|64)p$")>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTHSr, FCVTHDr)>;
-def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(H|S|D)r$")>;
-
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(16|32|64)$")>;
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(H|S|D)rr$")>;
-def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTSHr, FCVTDHr)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>;
def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>;
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instregex "^F(N)?MUL(H|S)rr$")>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instregex "^F(N)?MULSrr$")>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instregex "^F(N)?MULDrr$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instregex "^F(N)?MULDrr$")>;
-def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(H|S|D)rr$")>;
-def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(S|D)r$")>;
-def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], (instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>;
-def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64], (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32],
+ (instregex "^F(N)?M(ADD|SUB)Srrr$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64],
+ (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
// FP Miscellaneous Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WH|WS|XH|XD|XDHigh)r$")>;
-def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^FMOV(H|S|D)i$")>;
-def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>;
-def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Hr|Sr|Dr|v.*_ns)$")>;
+def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>;
+def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^FMOV(S|D)i$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>;
// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs FMOVD0, FMOVS0)>;
def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
-def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v1i64|v4i16|v2f32|v4f16|d|s)(_shift)?")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
-def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>;
// Load Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>;
-
-def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
-def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDP(W|X)i$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(B|H|W|X)ui$")>;
-def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(B|H|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
+ (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
+ (instregex "^LDP(W|X)i$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
+ (instregex "^LDP(W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(BB|HH|W|X)ui$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],
+ (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>;
def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDTR(B|H|W|X)i$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(B|H|W|X)i$")>;
-
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(BB|HH|W|X)i$")>;
+def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc],
+ (instrs LDPSWi)>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc],
+ (instregex "^LDPSW(post|pre)$")>;
def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc],
+ (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+def : InstRW<[FalkorWr_LDRSro], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>;
def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
-def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>;
-def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(B|H|W|X)ro(W|X)$")>;
-
-def : InstRW<[FalkorWr_LDRSro], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
-
-def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
-def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
-def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>;
-def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>;
-
// Miscellaneous Data-Processing Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>;
@@ -491,17 +586,22 @@ def : InstRW<[FalkorWr_2XYZ_2cyc], (i
// Divide and Multiply Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
-def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32], (instregex "^M(ADD|SUB)Wrrr$")>;
-
-def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
-def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64], (instregex "^M(ADD|SUB)Xrrr$")>;
-
-def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
-def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>;
-
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^(S|U)MULLv.*$")>;
-def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
+def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
+ (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
+def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32],
+ (instregex "^M(ADD|SUB)Wrrr$")>;
+
+def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
+def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
+ (instregex "^M(ADD|SUB)Xrrr$")>;
+
+def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>;
+
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
+ (instregex "^(S|U)MULLv.*$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
// Move and Shift Instructions
// -----------------------------------------------------------------------------
@@ -509,6 +609,11 @@ def : InstRW<[FalkorWr_1XYZ_1cyc], (i
def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>;
def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>;
def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs MOVi32imm, MOVi64imm)>;
+def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
+ (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
+def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
+ (instrs LOADgot)>;
// Other Instructions
// -----------------------------------------------------------------------------
@@ -518,13 +623,12 @@ def : InstRW<[FalkorWr_1ST_0cyc], (i
def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>;
def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS)>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>;
def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>;
def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
-def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>;
-def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs STNPWi, STNPXi)>;
def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>;
def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>;
@@ -534,15 +638,16 @@ def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc]
def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>;
def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>;
-def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>;
// Store Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[WriteST], (instregex "^STP(W|X)i$")>;
-def : InstRW<[WriteST, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>;
-def : InstRW<[WriteST], (instregex "^STR(BB|HH|W|X)ui$")>;
-def : InstRW<[WriteST, WriteAdr], (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>;
-def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>;
-def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>;
-def : InstRW<[FalkorWr_STRro], (instregex "^STR(B|H|W|X)ro(W|X)$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STP(W|X)i$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc],
+ (instregex "^STP(W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STR(BB|HH|W|X)ui$")>;
+def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc],
+ (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_STRro], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STUR(BB|HH|W|X)i$")>;
+
Modified: llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td?rev=304108&r1=304107&r2=304108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td Sun May 28 16:48:31 2017
@@ -26,6 +26,21 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// Define 0 micro-op types
+def FalkorWr_none_1cyc : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+def FalkorWr_none_3cyc : SchedWriteRes<[]> {
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+def FalkorWr_none_4cyc : SchedWriteRes<[]> {
+ let Latency = 4;
+ let NumMicroOps = 0;
+}
+
+//===----------------------------------------------------------------------===//
// Define 1 micro-op types
def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; }
@@ -49,6 +64,7 @@ def FalkorWr_1VXVY_4cyc : SchedWriteRes<
def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
+def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
@@ -163,11 +179,13 @@ def FalkorWr_1Z_1XY_0cyc : SchedWriteRes
def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
let Latency = 8;
+ let NumMicroOps = 2;
let ResourceCycles = [2, 8];
}
def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
let Latency = 16;
+ let NumMicroOps = 2;
let ResourceCycles = [2, 16];
}
@@ -309,6 +327,12 @@ def FalkorWr_2LD_1ST_1SD_3cyc: SchedWrit
let NumMicroOps = 4;
}
+def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 4;
+}
+
//===----------------------------------------------------------------------===//
// Define 5 micro-op types
@@ -335,7 +359,12 @@ def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWr
let Latency = 0;
let NumMicroOps = 5;
}
-
+def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitST,
+ FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 5;
+}
//===----------------------------------------------------------------------===//
// Define 6 micro-op types
@@ -352,6 +381,20 @@ def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWr
let NumMicroOps = 6;
}
+def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 6;
+}
+
+def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 6;
+}
+
//===----------------------------------------------------------------------===//
// Define 8 micro-op types
@@ -363,6 +406,14 @@ def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:Sc
let NumMicroOps = 8;
}
+def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 8;
+}
+
//===----------------------------------------------------------------------===//
// Define 9 micro-op types
@@ -384,6 +435,31 @@ def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4c
let NumMicroOps = 9;
}
+//===----------------------------------------------------------------------===//
+// Define 10 micro-op types
+
+def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 10;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 12 micro-op types
+
+def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 12;
+}
+
// Forwarding logic is modeled for multiply add/accumulate.
// -----------------------------------------------------------------------------
def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
More information about the llvm-commits
mailing list