[llvm] [AArch64] Fix schedmodel pre/post-index loads and stores for TSV110 (PR #68854)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 11 23:37:37 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Allen (vfdff)

<details>
<summary>Changes</summary>

Similar to D159254, this fixes the order of WriteAdr operands on
post/pre-inc loads/stores in the TSV110 scheduling model.

---

Patch is 182.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68854.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64SchedTSV110.td (+37-37) 
- (added) llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s (+3954) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
index af4a0176e44ee47..9e5060f1f364965 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
@@ -443,8 +443,8 @@ def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs LDRSWl)>;
 def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDR(BB|HH|W|X)ui$")>;
 def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
 
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr],     (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr],     (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt],     (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt],     (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
 
 def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDTR(B|H|W|X)i$")>;
 def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDUR(BB|HH|W|X)i$")>;
@@ -453,11 +453,11 @@ def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
 
 def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],     (instregex "^LDNP(W|X)i$")>;
 def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],     (instregex "^LDP(W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi],(instregex "^LDP(W|X)(post|pre)$")>;
 
 def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],           (instrs LDPSWi)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWpost)>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWpre)>;
 
 def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs PRFMl)>;
 def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs PRFUMi)>;
@@ -469,13 +469,13 @@ def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^PRFMro(W|X)$")>;
 // -----------------------------------------------------------------------------
 
 def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STN?P(W|X)i$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STP(W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_1cyc_1LdSt],  (instregex "^STP(W|X)(post|pre)$")>;
 def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STUR(BB|HH|W|X)i$")>;
 def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STTR(B|H|W|X)i$")>;
 def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STR(BB|HH|W|X)ui$")>;
 
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_1cyc_1LdSt],  (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_1cyc_1LdSt],  (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
 
 
 // FP Data Processing Instructions
@@ -524,11 +524,11 @@ def : InstRW<[TSV110Wr_2cyc_1F],      (instregex "^FMOV[SD][ir]$")>;
 
 def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDR[DSQ]l")>;
 def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDUR[BDHSQ]i")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],            (instregex "^LDR[BDHSQ](post|pre)")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt],            (instregex "^LDR[BDHSQ](post|pre)")>;
 def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDR[BDHSQ]ui")>;
 def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase],  (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
 def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi],           (instregex "^LDN?P[DQS]i")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt, WriteLDHi], (instregex "^LDP[DQS](post|pre)")>;
 
 
 // FP Store Instructions
@@ -539,7 +539,7 @@ def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](
 def : InstRW<[TSV110Wr_1cyc_1LdSt],                     (instregex "^STR[BHSDQ]ui")>;
 def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>;
 def : InstRW<[TSV110Wr_2cyc_2LdSt],                     (instregex "^STN?P[SDQ]i")>;
-def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr],           (instregex "^STP[SDQ](post|pre)")>;
+def : InstRW<[WriteAdr, TSV110Wr_2cyc_2LdSt],           (instregex "^STP[SDQ](post|pre)")>;
 
 
 // ASIMD Integer Instructions
@@ -704,70 +704,70 @@ def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>;
 // -----------------------------------------------------------------------------
 
 def : InstRW<[TSV110Wr_7cyc_1F_1LdSt],            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr],  (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_1F_1LdSt],  (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 def : InstRW<[TSV110Wr_7cyc_2F_1LdSt],            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr],  (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_2F_1LdSt],  (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 def : InstRW<[TSV110Wr_8cyc_3F_1LdSt],            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr],  (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_1LdSt],  (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 def : InstRW<[TSV110Wr_8cyc_3F_2LdSt],            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr],  (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_2LdSt],  (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 
 def  : InstRW<[TSV110Wr_7cyc_1F_1LdSt],           (instregex "LD1i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def  : InstRW<[WriteAdr, TSV110Wr_7cyc_1F_1LdSt], (instregex "LD1i(8|16|32|64)_POST$")>;
 def  : InstRW<[TSV110Wr_7cyc_2F_1LdSt],           (instregex "LD2i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def  : InstRW<[WriteAdr, TSV110Wr_7cyc_2F_1LdSt], (instregex "LD2i(8|16|32|64)_POST$")>;
 def  : InstRW<[TSV110Wr_8cyc_3F_1LdSt],           (instregex "LD3i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def  : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_1LdSt], (instregex "LD3i(8|16|32|64)_POST$")>;
 def  : InstRW<[TSV110Wr_8cyc_3F_2LdSt],           (instregex "LD4i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def  : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_2LdSt], (instregex "LD4i(8|16|32|64)_POST$")>;
 
 def : InstRW<[TSV110Wr_5cyc_1LdSt],               (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],     (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt],     (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 def : InstRW<[TSV110Wr_5cyc_1LdSt],               (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],     (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt],     (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 def : InstRW<[TSV110Wr_6cyc_3LdSt],               (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr],     (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_3LdSt],     (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 def : InstRW<[TSV110Wr_6cyc_2LdSt],               (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr],     (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_2LdSt],     (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 
 def : InstRW<[TSV110Wr_7cyc_2F_1LdSt],            (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr],  (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_2F_1LdSt],  (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 
 def : InstRW<[TSV110Wr_8cyc_3F_1LdSt],            (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr],  (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_1LdSt],  (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 
 def : InstRW<[TSV110Wr_10cyc_4F_4LdSt],           (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_10cyc_4F_4LdSt], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 
 
 // ASIMD Store Instructions
 // -----------------------------------------------------------------------------
 
 def  : InstRW<[TSV110Wr_3cyc_1F],             (instregex "ST1i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_3cyc_1F, WriteAdr],   (instregex "ST1i(8|16|32|64)_POST$")>;
+def  : InstRW<[WriteAdr, TSV110Wr_3cyc_1F],   (instregex "ST1i(8|16|32|64)_POST$")>;
 def  : InstRW<[TSV110Wr_4cyc_1F],             (instregex "ST2i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],   (instregex "ST2i(8|16|32|64)_POST$")>;
+def  : InstRW<[WriteAdr, TSV110Wr_4cyc_1F],   (instregex "ST2i(8|16|32|64)_POST$")>;
 def  : InstRW<[TSV110Wr_5cyc_1F],             (instregex "ST3i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],   (instregex "ST3i(8|16|32|64)_POST$")>;
+def  : InstRW<[WriteAdr, TSV110Wr_5cyc_1F],   (instregex "ST3i(8|16|32|64)_POST$")>;
 def  : InstRW<[TSV110Wr_6cyc_1F],             (instregex "ST4i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_6cyc_1F, WriteAdr],   (instregex "ST4i(8|16|32|64)_POST$")>;
+def  : InstRW<[WriteAdr, TSV110Wr_6cyc_1F],   (instregex "ST4i(8|16|32|64)_POST$")>;
 
 def : InstRW<[TSV110Wr_3cyc_1F],              (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr],    (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_3cyc_1F],    (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 def : InstRW<[TSV110Wr_4cyc_1F],              (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],    (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1F],    (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 def : InstRW<[TSV110Wr_5cyc_1F],              (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],    (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1F],    (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 def : InstRW<[TSV110Wr_6cyc_1F],              (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr],    (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_1F],    (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 
 def : InstRW<[TSV110Wr_4cyc_1F],              (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],    (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1F],    (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 
 def : InstRW<[TSV110Wr_5cyc_1F],              (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],    (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1F],    (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 
 def : InstRW<[TSV110Wr_8cyc_1F],              (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr],    (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_1F],    (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
 
 } // SchedModel = TSV110Model
diff --git a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s
new file mode 100644
index 000000000000000..2738f0b54c243ab
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s
@@ -0,0 +1,3954 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=tsv110 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1  { v1.1d }, [x27], #8
+ld1  { v1.2d }, [x27], #16
+ld1  { v1.2s }, [x27], #8
+ld1  { v1.4h }, [x27], #8
+ld1  { v1.4s }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1  { v1.8b }, [x27], #8
+ld1  { v1.8h }, [x27], #16
+ld1  { v1.16b }, [x27], #16
+ld1  { v1.1d }, [x27], x28
+ld1  { v1.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1  { v1.2s }, [x27], x28
+ld1  { v1.4h }, [x27], x28
+ld1  { v1.4s }, [x27], x28
+ld1  { v1.8b }, [x27], x28
+ld1  { v1.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1  { v1.16b }, [x27], x28
+ld1  { v1.1d, v2.1d }, [x27], #16
+ld1  { v1.2d, v2.2d }, [x27], #32
+ld1  { v1.2s, v2.2s }, [x27], #16
+ld1  { v1.4h, v2.4h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1  { v1.4s, v2.4s }, [x27], #32
+ld1  { v1.8b, v2.8b }, [x27], #16
+ld1  { v1.8h, v2.8h }, [x27], #32
+ld1  { v1.16b, v2.16b }, [x27], #32
+ld1  { v1.1d, v2.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1  { v1.2d, v2.2d }, [x27], x28
+ld1  { v1.2s, v2.2s }, [x27], x28
+ld1  { v1.4h, v2.4h }, [x27], x28
+ld1  { v1.4s, v2.4s }, [x27], x28
+ld1  { v1.8b, v2.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1  { v1.8h, v2.8h }, [x27], x28
+ld1  { v1.16b, v2.16b }, [x27], x28
+ld1  { v1.1d, v2.1d, v3.1d }, [x27], #24
+ld1  { v1.2d, v2.2d, v3.2d }, [x27], #48
+ld1  { v1.2s, v2.2s, v3.2s }, [x27], #24
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1  { v1.4h, v2.4h, v3.4h }, [x27], #24
+ld1  { v1.4s, v2.4s, v3.4s }, [x27], #48
+ld1  { v1.8b, v2.8b, v3.8b }, [x27], #24
+ld1  { v1.8h, v2.8h, v3.8h }, [x27], #48
+ld1  { v1.16b, v2.16b, v3.16b }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1  { v1.1d, v2.1d, v3.1d }, [x27], x28
+ld1  { v1.2d, v2.2d, v3.2d }, [x27], x28
+ld1  { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld1  { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld1  { v1.4s, v2.4s, v3.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1  { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld1  { v1.8h, v2.8h, v3.8h }, [x27], x28
+ld1  { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld1  { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+ld1  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+ld1  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+ld1  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+ld1  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+ld1  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+ld1  { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+ld1  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld1  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+ld1  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+ld1  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld1  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld1  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+ld1  { v1.b }[0], [x27], #1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1  { v1.b }[8], [x27], #1
+ld1  { v1.b }[0], [x27], x28
+ld1  { v1.b }[8], [x27], x28
+ld1  { v1.h }[0], [x27], #2
+ld1  { v1.h }[4], [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1  { v1.h }[0], [x27], x28
+ld1  { v1.h }[4], [x27], x28
+ld1  { v1.s }[0], [x27], #4
+ld1  { v1.s }[0], [x27], x28
+ld1  { v1.d }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1  { v1.d }[0], [x27], x28
+ld1r  { v1.1d }, [x27], #8
+ld1r  { v1.2d }, [x27], #8
+ld1r  { v1.2s }, [x27], #4
+ld1r  { v1.4h }, [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r  { v1.4s }, [x27], #4
+ld1r  { v1.8b }, [x27], #1
+ld1r  { v1.8h }, [x27], #2
+ld1r  { v1.16b }, [x27], #1
+ld1r  { v1.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r  { v1.2d }, [x27], x28
+ld1r  { v1.2s }, [x27], x28
+ld1r  { v1.4h }, [x27], x28
+ld1r  { v1.4s }, [x27], x28
+ld1r  { v1.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r  { v1.8h }, [x27], x28
+ld1r  { v1.16b }, [x27], x28
+ld2  { v1.2d, v2.2d }, [x27], #32
+ld2  { v1.2s, v2.2s }, [x27], #16
+ld2  { v1.4h, v2.4h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2  { v1.4s, v2.4s }, [x27], #32
+ld2  { v1.8b, v2.8b }, [x27], #16
+ld2  { v1.8h, v2.8h }, [x27], #32
+ld2  { v1.16b, v2.16b }, [x27], #32
+ld2  { v1.2d, v2.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2  { v1.2s, v2.2s }, [x27], x28
+ld2  { v1.4h, v2.4h }, [x27], x28
+ld2  { v1.4s, v2.4s }, [x27], x28
+ld2  { v1.8b, v2.8b }, [x27], x28
+ld2  { v1.8h, v2.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2  { v1.16b, v2.16b }, [x27], x28
+ld2  { v1.b, v2.b }[0], [x27], #2
+ld2  { v1.b, v2.b }[8], [x27], #2
+ld2  { v1.b, v2.b }[0], [x27], x28
+ld2  { v1.b, v2.b }[8], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2  { v1.h, v2.h }[0], [x27], #4
+ld2  { v1.h, v2.h }[4], [x27], #4
+ld2  { v1.h, v2.h }[0], [x27], x28
+ld2  { v1.h, v2.h }[4], [x27], x28
+ld2  { v1.s, v2.s }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2  { v1.s, v2.s }[0], [x27], x28
+ld2  { v1.d, v2.d }[0], [x27], #16
+ld2  { v1.d, v2.d }[0], [x27], x28
+ld2r  { v1.1d, v2.1d }, [x27], #16
+ld2r  { v1.2d, v2.2d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r  { v1.2s, v2.2s }, [x27], #8
+ld2r  { v1.4h, v2.4h }, [x27], #4
+ld2r  { v1.4s, v2.4s }, [x27], #8
+ld2r  { v1.8b, v2.8b }, [x27], #2
+ld2r  { v1.8h, v2.8h }, [x27], #4
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r  { v1.16b, v2.16b }, [x27], #2
+ld2r  { v1.1d, v2.1d }, [x27], x28
+ld2r  { v1.2d, v2.2d }, [x27], x28
+ld2r  { v1.2s, v2.2s }, [x27], x28
+ld2r  { v1.4h, v2.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r  { v1.4s, v2.4s }, [x27], x28
+ld2r  { v1.8b, v2.8b }, [x27], x28
+ld2r  { v1.8h, v2.8h }, [x27], x28
+ld2r  { v1.16b, v2.16b }, [x27], x28
+ld3  { v1.2d, v2.2d, v3.2d }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3  { v1.2s, v2.2s, v3.2s }, [x27], #24
+ld3  { v1.4h, v2.4h, v3.4h }, [x27], #24
+ld3  { v1.4s, v2.4s, v3.4s }, [x27], #48
+ld3  { v1.8b, v2.8b, v3.8b }, [x27], #24
+ld3  { v1.8h, v2.8h, v3.8h }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3  { v1.16b, v2.16b, v3.16b }, [x27], #48
+ld3  { v1.2d, v2.2d, v3.2d }, [x27], x28
+ld3  { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld3  { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld3  { v1.4s, v2.4s, v3.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3  { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld3  { v1.8h, v2.8h, v3.8h }, [x27], x28
+ld3  { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld3  { v1.b, v2.b, v3.b }[0], [x27], #3
+ld3  { v1.b, v2.b, v3.b }[8], [x27], #3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3  { v1.b, v2.b, v3.b }[0], [x27], x28
+ld3  { v1.b, v2.b, v3.b }[8], [x27], x28
+ld3  { v1.h, v2.h, v3.h }[0], [x27], #6
+ld3  { v1.h, v2.h, v3.h }[4], [x27], #6
+ld3  { v1.h, v2.h, v3.h }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3  { v1.h, v2.h, v3.h }[4], [x27], x28
+ld3  { v1.s, v2.s, v3.s }[0], [x27], #12
+ld3  { v1.s, v2.s, v3.s }[0], [x27], x28
+ld3  { v1.d, v2.d, v3.d }[0], [x27], #24
+ld3  { v1.d, v2.d, v3.d }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r  { v1.1d, v2.1d, v3.1d }, [x27], #24
+ld3r  { v1.2d, v2....
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/68854


More information about the llvm-commits mailing list