[llvm] [AArch64] Fix schedmodel pre/post-index loads and stores for TSV110 (PR #68854)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 11 23:37:37 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Allen (vfdff)
<details>
<summary>Changes</summary>
Similar to D159254, this fixes the order of WriteAdr operands on
post/pre-inc loads/stores in the TSV110 scheduling model.
---
Patch is 182.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68854.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64SchedTSV110.td (+37-37)
- (added) llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s (+3954)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
index af4a0176e44ee47..9e5060f1f364965 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
@@ -443,8 +443,8 @@ def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs LDRSWl)>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(BB|HH|W|X)ui$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTR(B|H|W|X)i$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDUR(BB|HH|W|X)i$")>;
@@ -453,11 +453,11 @@ def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDP(W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi],(instregex "^LDP(W|X)(post|pre)$")>;
def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWi)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWpost)>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWpre)>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFMl)>;
def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFUMi)>;
@@ -469,13 +469,13 @@ def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMro(W|X)$")>;
// -----------------------------------------------------------------------------
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STN?P(W|X)i$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_1cyc_1LdSt], (instregex "^STP(W|X)(post|pre)$")>;
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR(BB|HH|W|X)i$")>;
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STTR(B|H|W|X)i$")>;
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)ui$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[WriteAdr, TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
// FP Data Processing Instructions
@@ -524,11 +524,11 @@ def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOV[SD][ir]$")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[DSQ]l")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDUR[BDHSQ]i")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt], (instregex "^LDR[BDHSQ](post|pre)")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[BDHSQ]ui")>;
def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi], (instregex "^LDN?P[DQS]i")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt, WriteLDHi], (instregex "^LDP[DQS](post|pre)")>;
// FP Store Instructions
@@ -539,7 +539,7 @@ def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](
def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR[BHSDQ]ui")>;
def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>;
def : InstRW<[TSV110Wr_2cyc_2LdSt], (instregex "^STN?P[SDQ]i")>;
-def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr], (instregex "^STP[SDQ](post|pre)")>;
+def : InstRW<[WriteAdr, TSV110Wr_2cyc_2LdSt], (instregex "^STP[SDQ](post|pre)")>;
// ASIMD Integer Instructions
@@ -704,70 +704,70 @@ def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>;
// -----------------------------------------------------------------------------
def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_1F_1LdSt], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_2LdSt], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "LD1i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_1F_1LdSt], (instregex "LD1i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "LD2i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_2F_1LdSt], (instregex "LD2i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "LD3i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_1LdSt], (instregex "LD3i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "LD4i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_2LdSt], (instregex "LD4i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1LdSt], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_6cyc_3LdSt], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_3LdSt], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_6cyc_2LdSt], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_2LdSt], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_10cyc_4F_4LdSt], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_10cyc_4F_4LdSt], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD Store Instructions
// -----------------------------------------------------------------------------
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "ST1i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_3cyc_1F], (instregex "ST1i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "ST2i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1F], (instregex "ST2i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "ST3i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1F], (instregex "ST3i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_6cyc_1F], (instregex "ST4i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_1F], (instregex "ST4i(8|16|32|64)_POST$")>;
def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_3cyc_1F], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1F], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1F], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_6cyc_1F], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_6cyc_1F], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_4cyc_1F], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_5cyc_1F], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[TSV110Wr_8cyc_1F], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteAdr, TSV110Wr_8cyc_1F], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
} // SchedModel = TSV110Model
diff --git a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s
new file mode 100644
index 000000000000000..2738f0b54c243ab
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s
@@ -0,0 +1,3954 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=tsv110 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+ld1 { v1.2d }, [x27], #16
+ld1 { v1.2s }, [x27], #8
+ld1 { v1.4h }, [x27], #8
+ld1 { v1.4s }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+ld1 { v1.8h }, [x27], #16
+ld1 { v1.16b }, [x27], #16
+ld1 { v1.1d }, [x27], x28
+ld1 { v1.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+ld1 { v1.4h }, [x27], x28
+ld1 { v1.4s }, [x27], x28
+ld1 { v1.8b }, [x27], x28
+ld1 { v1.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+ld1 { v1.1d, v2.1d }, [x27], #16
+ld1 { v1.2d, v2.2d }, [x27], #32
+ld1 { v1.2s, v2.2s }, [x27], #16
+ld1 { v1.4h, v2.4h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+ld1 { v1.8b, v2.8b }, [x27], #16
+ld1 { v1.8h, v2.8h }, [x27], #32
+ld1 { v1.16b, v2.16b }, [x27], #32
+ld1 { v1.1d, v2.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+ld1 { v1.2s, v2.2s }, [x27], x28
+ld1 { v1.4h, v2.4h }, [x27], x28
+ld1 { v1.4s, v2.4s }, [x27], x28
+ld1 { v1.8b, v2.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+ld1 { v1.16b, v2.16b }, [x27], x28
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+ld1 { v1.b }[0], [x27], #1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+ld1 { v1.b }[0], [x27], x28
+ld1 { v1.b }[8], [x27], x28
+ld1 { v1.h }[0], [x27], #2
+ld1 { v1.h }[4], [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+ld1 { v1.h }[4], [x27], x28
+ld1 { v1.s }[0], [x27], #4
+ld1 { v1.s }[0], [x27], x28
+ld1 { v1.d }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+ld1r { v1.1d }, [x27], #8
+ld1r { v1.2d }, [x27], #8
+ld1r { v1.2s }, [x27], #4
+ld1r { v1.4h }, [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+ld1r { v1.8b }, [x27], #1
+ld1r { v1.8h }, [x27], #2
+ld1r { v1.16b }, [x27], #1
+ld1r { v1.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+ld1r { v1.2s }, [x27], x28
+ld1r { v1.4h }, [x27], x28
+ld1r { v1.4s }, [x27], x28
+ld1r { v1.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+ld1r { v1.16b }, [x27], x28
+ld2 { v1.2d, v2.2d }, [x27], #32
+ld2 { v1.2s, v2.2s }, [x27], #16
+ld2 { v1.4h, v2.4h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+ld2 { v1.8b, v2.8b }, [x27], #16
+ld2 { v1.8h, v2.8h }, [x27], #32
+ld2 { v1.16b, v2.16b }, [x27], #32
+ld2 { v1.2d, v2.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+ld2 { v1.4h, v2.4h }, [x27], x28
+ld2 { v1.4s, v2.4s }, [x27], x28
+ld2 { v1.8b, v2.8b }, [x27], x28
+ld2 { v1.8h, v2.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+ld2 { v1.b, v2.b }[0], [x27], #2
+ld2 { v1.b, v2.b }[8], [x27], #2
+ld2 { v1.b, v2.b }[0], [x27], x28
+ld2 { v1.b, v2.b }[8], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+ld2 { v1.h, v2.h }[4], [x27], #4
+ld2 { v1.h, v2.h }[0], [x27], x28
+ld2 { v1.h, v2.h }[4], [x27], x28
+ld2 { v1.s, v2.s }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+ld2 { v1.d, v2.d }[0], [x27], #16
+ld2 { v1.d, v2.d }[0], [x27], x28
+ld2r { v1.1d, v2.1d }, [x27], #16
+ld2r { v1.2d, v2.2d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+ld2r { v1.4h, v2.4h }, [x27], #4
+ld2r { v1.4s, v2.4s }, [x27], #8
+ld2r { v1.8b, v2.8b }, [x27], #2
+ld2r { v1.8h, v2.8h }, [x27], #4
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+ld2r { v1.1d, v2.1d }, [x27], x28
+ld2r { v1.2d, v2.2d }, [x27], x28
+ld2r { v1.2s, v2.2s }, [x27], x28
+ld2r { v1.4h, v2.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+ld2r { v1.8b, v2.8b }, [x27], x28
+ld2r { v1.8h, v2.8h }, [x27], x28
+ld2r { v1.16b, v2.16b }, [x27], x28
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+ld3r { v1.2d, v2....
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/68854
More information about the llvm-commits
mailing list