[llvm] r299456 - [AArch64] Refine Falkor Machine Model - Part 2

Balaram Makam via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 4 11:42:14 PDT 2017


Author: bmakam
Date: Tue Apr  4 13:42:14 2017
New Revision: 299456

URL: http://llvm.org/viewvc/llvm-project?rev=299456&view=rev
Log:
[AArch64] Refine Falkor Machine Model - Part 2

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64SchedFalkor.td
    llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorDetails.td
    llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td

Modified: llvm/trunk/lib/Target/AArch64/AArch64SchedFalkor.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedFalkor.td?rev=299456&r1=299455&r2=299456&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedFalkor.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedFalkor.td Tue Apr  4 13:42:14 2017
@@ -42,7 +42,11 @@ let SchedModel = FalkorModel in {
   def FalkorUnitVX   : ProcResource<1>; // Vector X-pipe
   def FalkorUnitVY   : ProcResource<1>; // Vector Y-pipe
 
+  def FalkorUnitGTOV : ProcResource<1>; // Scalar to Vector
+  def FalkorUnitVTOG : ProcResource<1>; // Vector to Scalar
+
   // Define the resource groups.
+  def FalkorUnitXY   : ProcResGroup<[FalkorUnitX, FalkorUnitY]>;
   def FalkorUnitXYZ  : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ]>;
   def FalkorUnitXYZB : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ,
                                      FalkorUnitB]>;
@@ -78,7 +82,7 @@ def : WriteRes<WriteLD,    [FalkorUnitLD
 def : WriteRes<WriteST,    [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
       { let Latency = 3; let NumMicroOps = 3; }
 def : WriteRes<WriteSTP,   [FalkorUnitST, FalkorUnitSD]>
-      { let Latency = 3; let NumMicroOps = 2; }
+      { let Latency = 0; let NumMicroOps = 2; }
 def : WriteRes<WriteAdr,   [FalkorUnitXYZ]> { let Latency = 5; }
 def : WriteRes<WriteLDIdx, [FalkorUnitLD]> { let Latency = 5; }
 def : WriteRes<WriteSTIdx, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
@@ -95,7 +99,8 @@ def : WriteRes<WriteFDiv,  [FalkorUnitVX
       { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
 def : WriteRes<WriteV,     [FalkorUnitVXVY]> { let Latency = 6; }
 def : WriteRes<WriteVLD,   [FalkorUnitLD]> { let Latency = 3; }
-def : WriteRes<WriteVST,   [FalkorUnitST]> { let Latency = 0; }
+def : WriteRes<WriteVST,   [FalkorUnitST, FalkorUnitVSD]>
+      { let Latency = 0; let NumMicroOps = 2; }
 
 def : WriteRes<WriteSys,     []> { let Latency = 1; }
 def : WriteRes<WriteBarrier, []> { let Latency = 1; }

Modified: llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorDetails.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorDetails.td?rev=299456&r1=299455&r2=299456&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorDetails.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorDetails.td Tue Apr  4 13:42:14 2017
@@ -70,82 +70,400 @@ def : InstRW<[FalkorWr_2VXVY_6cyc],   (i
 
 def : InstRW<[FalkorWr_3VXVY_4cyc],   (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
 
-def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v8i16)$")>;
+def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v4i32|v8i16|v4f32)$")>;
 
 def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>;
 
+// SIMD Integer Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs ADDPv2i64p)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHLv1i64$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs PMULv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)ADDLVv4i16v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs ADDVv4i16v)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)ADDLVv8i8v$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs ADDVv8i8v)>;
+def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^SQDMU?L(A|S)?L()v.*$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^SQRDML(A|S)?H(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc],   (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs ADDVv4i32v)>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs ADDVv8i16v)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(ADD|SUB)HNv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc],   (instrs ADDVv16i8v)>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc],   (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
+def : InstRW<[FalkorWr_2VXVY_6cyc],   (instregex "^R(ADD|SUB)HNv.*$")>;
+
+def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs ADDPv2i64)>; // sz==11
+def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)ADDLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SUBLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^PMULL2?(v8i8|v16i8)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)ABDLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^PMULL2?(v1i64|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^SQD(MLAL|MLSL|MULL)v.*$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_3VXVY_3cyc],   (instregex "^(S|U)ADDLVv4i32v$")>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^(S|U)ADDLVv8i16v$")>;
+
+def : InstRW<[FalkorWr_3VXVY_6cyc],   (instregex "^(S|U)ADDLVv16i8v$")>;
+
+def : InstRW<[FalkorWr_4VXVY_2cyc],   (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
+
+def : InstRW<[FalkorWr_4VXVY_3cyc],   (instregex "^(S|U)ABALv.*$")>;
+
+def : InstRW<[FalkorWr_4VXVY_4cyc],   (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
+
 // SIMD Load Instructions
 // -----------------------------------------------------------------------------
-def : InstRW<[WriteVLD],                (instregex "LD1i(8|16|32|64)$")>;
-def : InstRW<[WriteVLD],                (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc],       (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_3LD_3cyc],       (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_4LD_3cyc],       (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD, WriteAdr],                (instregex "LD1i(8|16|32|64)_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr],                (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr],       (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr],       (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr],       (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[WriteVLD], (instregex "LD2i(8|16|32|64)$")>;
-def : InstRW<[WriteVLD], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD], (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[WriteVLD], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
-
-def : InstRW<[WriteVLD], (instregex "LD3i(8|16|32|64)$")>;
-def : InstRW<[WriteVLD], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
-def : InstRW<[WriteVLD], (instregex "LD3Threev(2d)$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
-
-def : InstRW<[WriteVLD], (instregex "LD4i(8|16|32|64)$")>;
-def : InstRW<[WriteVLD], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
-def : InstRW<[WriteVLD], (instregex "LD4Fourv(2d)$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+def : InstRW<[WriteVLD],                               (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
+def : InstRW<[WriteVLD],                               (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD],                               (instrs LD2i64)>;
+def : InstRW<[WriteVLD, WriteAdr],                     (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
+def : InstRW<[WriteVLD, WriteAdr],                     (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteVLD, WriteAdr],                     (instrs LD2i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc],                (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr],      (instregex "LD1i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_1none_3cyc],                (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc],                (instregex "^LD2Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc],                (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr],      (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr],      (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr],      (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_3cyc],                      (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc],                      (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc],                      (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc],                      (instrs LD3i64)>;
+def : InstRW<[FalkorWr_2LD_3cyc],                      (instrs LD4i64)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr],            (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr],            (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr],            (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr],            (instrs LD3i64_POST)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr],            (instrs LD4i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc],                (instregex "^LD2i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, WriteAdr],      (instregex "^LD2i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_1none_3cyc],                (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc],                (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr],      (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr],      (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_3LD_3cyc],                      (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_3LD_3cyc],                      (instrs LD3Threev2d)>;
+def : InstRW<[FalkorWr_3LD_3cyc],                      (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr],            (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr],            (instrs LD3Threev2d_POST)>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr],            (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc],                (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, WriteAdr],      (instregex "LD3i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2none_3cyc],                (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc],                (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr],      (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr],      (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_4LD_3cyc],                      (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_4LD_3cyc],                      (instrs LD4Fourv2d)>;
+def : InstRW<[FalkorWr_4LD_3cyc],                      (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr],            (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr],            (instrs LD4Fourv2d_POST)>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr],            (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc],                (instregex "^LD4i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, WriteAdr],      (instregex "^LD4i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc],          (instregex "LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, WriteAdr],(instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc],          (instregex "^LD4Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, WriteAdr],(instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc],      (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc],      (instregex "^LD4Fourv(16b|8h|4s)$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, WriteAdr],(instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_2XYZ_2cyc],    (instregex "^ADD(S)?(W|X)r(s|x)$")>;
+def : InstRW<[FalkorWr_2XYZ_2cyc],    (instregex "^SUB(S)?(W|X)r(s|x)$")>;
+
+// SIMD Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs TBLv8i8One)>;
+def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs TBLv16i8One)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs TBLv8i8Two)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^TBX(v8|v16)i8One$")>;
+def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^TBL(v8i8Three|v16i8Two)$")>;
+def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^TBX(v8i8Two|v16i8Two)$")>;
+def : InstRW<[FalkorWr_4VXVY_6cyc],   (instregex "^TBL(v8i8Four|v16i8Three)$")>;
+def : InstRW<[FalkorWr_4VXVY_6cyc],   (instregex "^TBX(v8i8Three|v16i8Three)$")>;
+def : InstRW<[FalkorWr_5VXVY_7cyc],   (instrs TBLv16i8Four)>;
+def : InstRW<[FalkorWr_5VXVY_7cyc],   (instregex "^TBX(v8i8Four|v16i8Four)$")>;
 
 // SIMD Store Instructions
 // -----------------------------------------------------------------------------
-def : InstRW<[WriteVST], (instregex "ST1i(8|16|32|64)$")>;
-def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVST], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVST], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVST], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[WriteVST], (instregex "ST2i(8|16|32|64)$")>;
-def : InstRW<[WriteVST], (instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[WriteVST], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[WriteVST], (instregex "ST3i(8|16|32|64)$")>;
-def : InstRW<[WriteVST], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
-def : InstRW<[WriteVST], (instregex "ST3Threev(2d)$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
-
-def : InstRW<[WriteVST], (instregex "ST4i(8|16|32|64)$")>;
-def : InstRW<[WriteVST], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
-def : InstRW<[WriteVST], (instregex "ST4Fourv(2d)$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
-def : InstRW<[WriteVST, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+def : InstRW<[WriteVST],                                                        (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[WriteVST],                                                        (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
+def : InstRW<[WriteVST, WriteAdr],                                              (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[WriteVST, WriteAdr],                                              (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST],                                              (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[WriteVST, WriteVST],                                              (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST],                                              (instregex "^ST3(i8|i16|i32|i64)$")>;
+def : InstRW<[WriteVST, WriteVST],                                              (instregex "^ST4(i8|i16|i32|i64)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr],                                    (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr],                                    (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr],                                    (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr],                                    (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
+
+def : InstRW<[WriteV, WriteVST, WriteVST],                                      (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>;
+def : InstRW<[WriteV, WriteVST, WriteVST, WriteAdr],                            (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST, WriteVST],                                    (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST],                                    (instrs ST3Threev2d)>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr],                          (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr],                          (instrs ST3Threev2d_POST)>;
+
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST],                              (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>;
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteAdr],                    (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST],                          (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST],                          (instrs ST4Fourv2d)>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],                (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],                (instrs ST4Fourv2d_POST)>;
+
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST],          (instregex "^ST3Three(v16b|v8h|v4s)$")>;
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
+
+def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST],          (instregex "^ST4Four(v16b|v8h|v4s)$")>;
+def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1none_0cyc],   (instrs B)>;
+def : InstRW<[FalkorWr_1Z_0cyc],      (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
+def : InstRW<[FalkorWr_1ZB_0cyc],     (instrs Bcc)>;
+def : InstRW<[FalkorWr_1XYZB_0cyc],   (instrs BL)>;
+def : InstRW<[FalkorWr_1Z_1XY_0cyc],  (instrs BLR)>;
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs SHA1Hrr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs AESIMCrr, AESMCrr)>;
+def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs AESDrr, AESErr)>;
+def : InstRW<[FalkorWr_2VXVY_2cyc],   (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
+def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
+def : InstRW<[FalkorWr_4VXVY_3cyc],   (instrs SHA256SU1rrr)>;
+
+// FP Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteLD],               (instregex "^LDR((Q|D|S|H|B)(post|pre|ui)|(Q|D|S)l)$")>;
+def : InstRW<[WriteLD],               (instregex "^LDUR(Q|D|S|H|B)i$")>;
+def : InstRW<[FalkorWr_1XYZ_1LD_4cyc],(instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc],     (instrs LDNPQi)>;
+def : InstRW<[FalkorWr_2LD_3cyc],     (instregex "^LDPQ(pre|post|i)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc],(instregex "LDNP(D|S)i$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc],(instregex "LDP(D|S)(pre|post|i)$")>;
+
+// FP Data Processing Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCCMP(E)?(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCMP(E)?(H|S|D)r(r|i)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVT(A|M|N|P)(S|U)U(W|X)(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(FABS|FNEG)(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCSEL(H|S|D)rrr$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^F(MAX|MIN)(NM)?(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^F(MAX|MIN)(NM)?Pv2i(16|32|64)p$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs FCVTHSr, FCVTHDr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)(H|S|D)r$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^FABD(16|32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(FADD|FSUB)(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FCVTSHr, FCVTDHr)>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs FCVTSDr, FCVTDSr)>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc],   (instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>;
+def : InstRW<[FalkorWr_1VXVY_5cyc],   (instregex "^F(N)?MUL(H|S)rr$")>;
+
+def : InstRW<[FalkorWr_1VXVY_6cyc],   (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
+def : InstRW<[FalkorWr_1VXVY_6cyc],   (instregex "^F(N)?MULDrr$")>;
+
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>;
+
+// FP Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FMOV(WH|WS|XH|XD|XDHigh)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FMOV(Hi|Hr|S0|Si|Sr|D0|Di|Dr|v.*_ns)$")>;
+
+def : InstRW<[FalkorWr_1GTOV_4cyc],   (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v1i64|v4i16|v2f32|v4f16|d|s)(_shift)?")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>;
+
+
+// Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFMui, PRFMl)>;
+def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFUMi)>;
+
+def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDP(W|X)(i|post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDR(B|H|W|X)(post|pre|ui)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDR(W|X)l$")>;
+def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDTR(B|H|W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDUR(B|H|W|X)i$")>;
+
+def : InstRW<[FalkorWr_1LD_4cyc],     (instregex "^LDPSW(i|post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc],     (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre|ui)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc],     (instrs LDRSWl)>;
+def : InstRW<[FalkorWr_1LD_4cyc],     (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[FalkorWr_1LD_4cyc],     (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
+
+def : InstRW<[FalkorWr_1XYZ_1ST_4cyc],(instregex "^PRFMro(W|X)$")>;
+def : InstRW<[FalkorWr_1XYZ_1LD_4cyc],(instregex "^LDR(B|H|W|X)ro(W|X)$")>;
+
+def : InstRW<[FalkorWr_1XYZ_1LD_5cyc],(instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
+
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(S|U)?BFM(W|X)ri$")>;
+def : InstRW<[FalkorWr_1X_2cyc],      (instregex "^CRC32.*$")>;
+def : InstRW<[FalkorWr_1XYZ_2cyc],    (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
+def : InstRW<[FalkorWr_2XYZ_2cyc],    (instregex "^EXTR(W|X)rri$")>;
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1X_4cyc],      (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
+def : InstRW<[FalkorWr_1X_4cyc],      (instregex "^M(ADD|SUB)Wrrr$")>;
+
+def : InstRW<[FalkorWr_1X_5cyc],      (instregex "^(S|U)MULHrr$")>;
+def : InstRW<[FalkorWr_1X_5cyc],      (instregex "^M(ADD|SUB)Xrrr$")>;
+
+def : InstRW<[FalkorWr_1X_1Z_8cyc],   (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[FalkorWr_1X_1Z_16cyc],  (instregex "^(S|U)DIVXr$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)(MLAL|MLSL|MULL)v.*$")>;
+
+// Move and Shift Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>;
+def : InstRW<[FalkorWr_1XYZB_1cyc],   (instregex "^ADRP?$")>;
+def : InstRW<[FalkorWr_1XYZB_1cyc],   (instregex "^MOVN(W|X)i$")>;
+def : InstRW<[FalkorWr_1XYZB_1cyc],   (instregex "^MOVZ(W|X)i$")>;
+
+// Other Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1LD_0cyc],     (instrs CLREX, DMB, DSB)>;
+def : InstRW<[FalkorWr_1none_0cyc],   (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
+def : InstRW<[FalkorWr_1ST_0cyc],     (instrs SYSxt, SYSLxt)>;
+def : InstRW<[FalkorWr_1Z_0cyc],      (instrs MSRpstateImm1, MSRpstateImm4)>;
+
+def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>;
+def : InstRW<[FalkorWr_1LD_3cyc],     (instrs MRS)>;
+
+def : InstRW<[FalkorWr_1LD_1Z_3cyc],  (instrs DRPS)>;
+
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
+def : InstRW<[WriteVST],              (instrs STNPDi, STNPSi)>;
+def : InstRW<[WriteSTP],               (instrs STNPWi, STNPXi)>;
+def : InstRW<[FalkorWr_2LD_1Z_3cyc],  (instrs ERET)>;
+
+def : InstRW<[WriteST],               (instregex "^LDC.*$")>;
+def : InstRW<[WriteST],               (instregex "^STLR(B|H|W|X)$")>;
+def : InstRW<[WriteST],               (instregex "^STXP(W|X)$")>;
+def : InstRW<[WriteST],               (instregex "^STXR(B|H|W|X)$")>;
+
+def : InstRW<[WriteSTX],              (instregex "^STLXP(W|X)$")>;
+def : InstRW<[WriteSTX],              (instregex "^STLXR(B|H|W|X)$")>;
+def : InstRW<[WriteVST, WriteVST],    (instrs STNPQi)>;
+
+// Store Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteVST],              (instregex "^STP(D|S)(i|post|pre)$")>;
+def : InstRW<[WriteST],               (instregex "^STP(W|X)(i|post|pre)$")>;
+def : InstRW<[WriteST],               (instregex "^STR(B|H|W|X)(post|pre|ui)$")>;
+def : InstRW<[WriteST],               (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[WriteST],               (instregex "^STUR(B|H|W|X)i$")>;
+
+def : InstRW<[WriteST, WriteAdr],     (instregex "^STR(B|H|W|X)ro(W|X)$")>;
+
+def : InstRW<[WriteVST, WriteVST],    (instregex "^STPQ(i|post|pre)$")>;

Modified: llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td?rev=299456&r1=299455&r2=299456&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td Tue Apr  4 13:42:14 2017
@@ -28,14 +28,15 @@
 def FalkorWr_1X_2cyc    : SchedWriteRes<[FalkorUnitX]>   { let Latency = 2; }
 def FalkorWr_1X_4cyc    : SchedWriteRes<[FalkorUnitX]>   { let Latency = 4; }
 def FalkorWr_1X_5cyc    : SchedWriteRes<[FalkorUnitX]>   { let Latency = 5; }
-def FalkorWr_1Z_1cyc    : SchedWriteRes<[FalkorUnitZ]>   { let Latency = 1; }
-def FalkorWr_1Z_2cyc    : SchedWriteRes<[FalkorUnitZ]>   { let Latency = 2; }
-def FalkorWr_1ZB_1cyc   : SchedWriteRes<[FalkorUnitZB]>  { let Latency = 1; }
+def FalkorWr_1Z_0cyc    : SchedWriteRes<[FalkorUnitZ]>   { let Latency = 0; }
+def FalkorWr_1ZB_0cyc   : SchedWriteRes<[FalkorUnitZB]>  { let Latency = 0; }
 def FalkorWr_1LD_3cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 3; }
 def FalkorWr_1LD_4cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 4; }
 def FalkorWr_1XYZ_1cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
 def FalkorWr_1XYZ_2cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
+def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
 def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
+def FalkorWr_1none_0cyc : SchedWriteRes<[]>              { let Latency = 0; }
 
 def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
 def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
@@ -44,8 +45,12 @@ def FalkorWr_1VXVY_4cyc : SchedWriteRes<
 def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
 def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
 
-def FalkorWr_1LD_1cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 1; }
-def FalkorWr_1ST_1cyc   : SchedWriteRes<[FalkorUnitST]>  { let Latency = 1; }
+def FalkorWr_1LD_0cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 0; }
+def FalkorWr_1ST_0cyc   : SchedWriteRes<[FalkorUnitST]>  { let Latency = 0; }
+
+def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
+def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
+def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
 
 //===----------------------------------------------------------------------===//
 // Define 2 micro-op types
@@ -88,12 +93,7 @@ def FalkorWr_2LD_3cyc   : SchedWriteRes<
   let NumMicroOps = 2;
 }
 
-
-def FalkorWr_1VY_1VX_3cyc : SchedWriteRes<[FalkorUnitVY, FalkorUnitVX]> {
-  let Latency = 3;
-  let NumMicroOps = 2;
-}
-def FalkorWr_1VY_1VX_5cyc : SchedWriteRes<[FalkorUnitVY, FalkorUnitVX]> {
+def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
   let Latency = 5;
   let NumMicroOps = 2;
 }
@@ -103,6 +103,11 @@ def FalkorWr_1VX_1VY_2cyc : SchedWriteRe
   let NumMicroOps = 2;
 }
 
+def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+
 def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
   let Latency = 10;
   let NumMicroOps = 2;
@@ -122,11 +127,36 @@ def FalkorWr_2XYZ_2cyc   : SchedWriteRes
   let NumMicroOps = 2;
 }
 
+def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
+  let Latency = 0;
+  let NumMicroOps = 2;
+}
+
+def FalkorWr_1X_1Z_8cyc  : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+}
+
+def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+  let Latency = 16;
+  let NumMicroOps = 2;
+}
+
 def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
   let Latency = 3;
   let NumMicroOps = 2;
 }
 
+def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
+  let Latency = 3;
+  let NumMicroOps = 2;
+}
+
+def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
+  let Latency = 0;
+  let NumMicroOps = 2;
+}
+
 //===----------------------------------------------------------------------===//
 // Define 3 micro-op types
 
@@ -166,11 +196,6 @@ def FalkorWr_2LD_1Z_3cyc     : SchedWrit
   let Latency = 3;
   let NumMicroOps = 3;
 }
-def FalkorWr_1LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
-                                              FalkorUnitSD]> {
-  let Latency = 3;
-  let NumMicroOps = 3;
-}
 
 //===----------------------------------------------------------------------===//
 // Define 4 micro-op types
@@ -208,8 +233,13 @@ def FalkorWr_4LD_3cyc      : SchedWriteR
   let NumMicroOps = 4;
 }
 
-def FalkorWr_1LD_1ST_1SD_1LD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
-                                                  FalkorUnitSD, FalkorUnitLD]> {
+def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+                                            FalkorUnitVXVY, FalkorUnitVXVY]> {
+  let Latency = 4;
+  let NumMicroOps = 4;
+}
+
+def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
   let Latency = 3;
   let NumMicroOps = 4;
 }
@@ -223,7 +253,7 @@ def FalkorWr_1LD_4VXVY_4cyc: SchedWriteR
   let Latency = 4;
   let NumMicroOps = 5;
 }
-def FalkorWr_2LD_2VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
   let Latency = 4;
   let NumMicroOps = 5;
@@ -258,6 +288,15 @@ def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:Sc
 //===----------------------------------------------------------------------===//
 // Define 9 micro-op types
 
+def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+                                             FalkorUnitLD, FalkorUnitVXVY,
+                                             FalkorUnitVXVY, FalkorUnitLD,
+                                             FalkorUnitLD, FalkorUnitXYZ,
+                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
+  let Latency = 4;
+  let NumMicroOps = 9;
+}
+
 def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
                                              FalkorUnitLD, FalkorUnitVXVY,
                                              FalkorUnitVXVY, FalkorUnitXYZ,




More information about the llvm-commits mailing list