[llvm] d9d9be6 - [AArch64] Update SVE scheduling of some CPUs
Harvin Iriawan via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 4 02:42:06 PDT 2023
Author: Harvin Iriawan
Date: 2023-07-04T10:41:56+01:00
New Revision: d9d9be63a52dc6e908dba8f87d44192ee47ac5f8
URL: https://github.com/llvm/llvm-project/commit/d9d9be63a52dc6e908dba8f87d44192ee47ac5f8
DIFF: https://github.com/llvm/llvm-project/commit/d9d9be63a52dc6e908dba8f87d44192ee47ac5f8.diff
LOG: [AArch64] Update SVE scheduling of some CPUs
* Update cortex-a510 and neoverse-v2 SVE scheduling so that pseudos
have the same instruction latency as original instruction.
Differential Revision: https://reviews.llvm.org/D154084
Added:
llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp
Modified:
llvm/lib/Target/AArch64/AArch64SchedA510.td
llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
llvm/unittests/Target/AArch64/CMakeLists.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 85e73b2fff6bc2..2526fe3041909e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -554,196 +554,200 @@ def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKPAS_PPzPP, BR
// Loop control, based on GPR
def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
- (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
+ (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
// Loop terminate
-def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
// Predicate counting scalar
def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
- (instregex "^CNT[BHWD]_XPiI$")>;
+ (instregex "^CNT[BHWD]_XPiI")>;
def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
- (instregex "^(INC|DEC)[BHWD]_XPiI$")>;
+ (instregex "^(INC|DEC)[BHWD]_XPiI")>;
def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
- (instregex "^(SQINC|SQDEC|UQINC|UQDEC)[BHWD]_[XW]Pi(Wd)?I$")>;
+ (instregex "^(SQINC|SQDEC|UQINC|UQDEC)[BHWD]_[XW]Pi(Wd)?I")>;
// Predicate counting scalar, active predicate
def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
- (instregex "^CNTP_XPP_[BHSD]$")>;
+ (instregex "^CNTP_XPP_[BHSD]")>;
def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
- (instregex "^(DEC|INC)P_XP_[BHSD]$")>;
+ (instregex "^(DEC|INC)P_XP_[BHSD]")>;
def : InstRW<[CortexA510Write<8, CortexA510UnitVALU0>],
- (instregex "^(SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
- "^(UQDEC|UQINC)P_WP_[BHSD]$",
- "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>;
+ (instregex "^(SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
+ "^(UQDEC|UQINC)P_WP_[BHSD]",
+ "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]")>;
// Predicate counting vector, active predicate
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
- (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
+ (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
// Predicate logical
def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
- (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
+ (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
// Predicate logical, flag setting
def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
- (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
+ (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
// Predicate reverse
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^REV_PP_[BHSD]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^REV_PP_[BHSD]")>;
// Predicate select
def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs SEL_PPPP)>;
// Predicate set
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
// Predicate set/initialize, set flags
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PTRUES_[BHSD]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PTRUES_[BHSD]")>;
// Predicate find first/next
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
// Predicate test
def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs PTEST_PP)>;
// Predicate transpose
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^TRN[12]_PPP_[BHSDQ]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^TRN[12]_PPP_[BHSDQ]")>;
// Predicate unpack and widen
def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
// Predicate zip/unzip
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]")>;
// SVE integer instructions
// -----------------------------------------------------------------------------
// Arithmetic, absolute
diff
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABD_(ZPmZ|ZPZZ)_[BHSD]")>;
// Arithmetic, absolute
diff accum
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
// Arithmetic, absolute
diff accum long
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
// Arithmetic, absolute
diff long
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
// Arithmetic, basic
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
- (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$",
- "^(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
- "^(ADD|SUB)_ZZZ_[BHSD]$",
- "^(ADD|SUB|SUBR)_ZI_[BHSD]$",
- "^ADR_[SU]XTW_ZZZ_D_[0123]$",
- "^ADR_LSL_ZZZ_[SD]_[0123]$",
- "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$",
- "^SADDLBT_ZZZ_[HSD]$",
- "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
- "^SSUBL(BT|TB)_ZZZ_[HSD]$")>;
+ (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
+ "^(ADD|SUB)_ZZZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZI_[BHSD]",
+ "^ADR_[SU]XTW_ZZZ_D_[0123]",
+ "^ADR_LSL_ZZZ_[SD]_[0123]",
+ "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
+ "^SADDLBT_ZZZ_[HSD]",
+ "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
// Arithmetic, complex
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
- (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$",
- "^SQ(ABS|NEG)_ZPmZ_[BHSD]$",
- "^SQ(ADD|SUB|SUBR)_ZPmZ_?[BHSD]$",
- "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$",
- "^[SU]Q(ADD|SUB)_ZI_[BHSD]$",
- "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$",
- "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>;
+ (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
+ "^SQ(ABS|NEG)_ZPmZ_[BHSD]",
+ "^SQ(ADD|SUB|SUBR)_ZPmZ_?[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
+ "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
+ "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
// Arithmetic, large integer
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
// Arithmetic, pairwise add
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^ADDP_ZPmZ_[BHSD]")>;
// Arithmetic, pairwise add and accum long
-def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
// Arithmetic, shift
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
- (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$",
- "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$",
- "^(ASR|LSL|LSR)_ZPmI_[BHSD]$",
- "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$",
- "^(ASR|LSL|LSR)_ZZI_[BHSD]$",
- "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>;
+ (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
+ "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
+ "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPZI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPZZ_[BHSD]",
+ "^(ASR|LSL|LSR)_ZZI_[BHSD]",
+ "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
+// Arithmetic, shift right for divide
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^ASRD_ZPmI_[BHSD]",
+ "^ASRD_ZPZI_[BHSD]")>;
// Arithmetic, shift and accumulate
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
- (instregex "^(SSRA|USRA)_ZZI_[BHSD]$")>;
+ (instregex "^(SSRA|USRA)_ZZI_[BHSD]")>;
def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>],
- (instregex "^(SRSRA|URSRA)_ZZI_[BHSD]$")>;
+ (instregex "^(SRSRA|URSRA)_ZZI_[BHSD]")>;
// Arithmetic, shift by immediate
// Arithmetic, shift by immediate and insert
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
- (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>;
+ (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]")>;
// Arithmetic, shift complex
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
- (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$",
- "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$",
- "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$",
- "^SQSHRU?N[BT]_ZZI_[BHS]$",
- "^UQR?SHRN[BT]_ZZI_[BHS]$")>;
-
-// Arithmetic, shift right for divide
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^ASRD_ZPmI_[BHSD]$")>;
+ (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
+ "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_(ZPmZ|ZPZZ)_[BHSD]",
+ "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
+ "^SQSHRU?N[BT]_ZZI_[BHS]",
+ "^UQR?SHRN[BT]_ZZI_[BHS]")>;
// Arithmetic, shift rounding
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
- (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$",
- "^[SU]RSHR_ZPmI_[BHSD]$")>;
+ (instregex "^(SRSHL|SRSHR|SRSHLR|URSHL|URSHLR|URSHR)_(ZPmZ|ZPZZ|ZPZI)_[BHSD]",
+ "^[SU]RSHR_ZPmI_[BHSD]")>;
// Bit manipulation
def : InstRW<[CortexA510MCWrite<14, 13, CortexA510UnitVMC>],
- (instregex "^(BDEP|BEXT|BGRP)_ZZZ_B$")>;
+ (instregex "^(BDEP|BEXT|BGRP)_ZZZ_B")>;
def : InstRW<[CortexA510MCWrite<22, 21, CortexA510UnitVMC>],
- (instregex "^(BDEP|BEXT|BGRP)_ZZZ_H$")>;
+ (instregex "^(BDEP|BEXT|BGRP)_ZZZ_H")>;
def : InstRW<[CortexA510MCWrite<38, 37, CortexA510UnitVMC>],
- (instregex "^(BDEP|BEXT|BGRP)_ZZZ_S$")>;
+ (instregex "^(BDEP|BEXT|BGRP)_ZZZ_S")>;
def : InstRW<[CortexA510MCWrite<70, 69, CortexA510UnitVMC>],
- (instregex "^(BDEP|BEXT|BGRP)_ZZZ_D$")>;
+ (instregex "^(BDEP|BEXT|BGRP)_ZZZ_D")>;
// Bitwise select
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
// Count/reverse bits
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(CLS|CLZ|RBIT)_ZPmZ_[BHSD]$")>;
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_[BH]$")>;
-def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_S$")>;
-def : InstRW<[CortexA510Write<12, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_D$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(CLS|CLZ|RBIT)_ZPmZ_[BHSD]")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_[BH]")>;
+def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_S")>;
+def : InstRW<[CortexA510Write<12, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_D")>;
// Broadcast logical bitmask immediate to vector
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs DUPM_ZI)>;
// Compare and set flags
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
- (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
- "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
+ (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
+ "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
// Complex add
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CADD_ZZI_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CADD_ZZI_[BHSD]")>;
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^SQCADD_ZZI_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^SQCADD_ZZI_[BHSD]")>;
// Complex dot product 8-bit element
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
@@ -752,19 +756,19 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CDOT_ZZZ_S, CDOT_
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
// Complex multiply-add B, H, S element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^CMLA_ZZZ_[BHS]$",
- "^CMLA_ZZZI_[HS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^CMLA_ZZZ_[BHS]",
+ "^CMLA_ZZZI_[HS]")>;
// Complex multiply-add D element size
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CMLA_ZZZ_D)>;
// Conditional extract operations, scalar form
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
// Conditional extract operations, SIMD&FP scalar and vector forms
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
- "^COMPACT_ZPZ_[SD]$",
- "^SPLICE_ZPZZ?_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
+ "^COMPACT_ZPZ_[SD]",
+ "^SPLICE_ZPZZ?_[BHSD]")>;
// Convert to floating point, 64b to float or convert to double
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]")>;
@@ -782,165 +786,166 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPm
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
// Copy, scalar
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>],(instregex "^CPY_ZPmR_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>],(instregex "^CPY_ZPmR_[BHSD]")>;
// Copy, scalar SIMD&FP or imm
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CPY_ZPm[IV]_[BHSD]$",
- "^CPY_ZPzI_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CPY_ZPm[IV]_[BHSD]",
+ "^CPY_ZPzI_[BHSD]")>;
// Divides, 32 bit
-def : InstRW<[CortexA510MCWrite<15, 12, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
+def : InstRW<[CortexA510MCWrite<15, 12, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_S")>;
// Divides, 64 bit
-def : InstRW<[CortexA510MCWrite<26, 23, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
+def : InstRW<[CortexA510MCWrite<26, 23, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_D")>;
// Dot product, 8 bit
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_S$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_S")>;
// Dot product, 8 bit, using signed and unsigned integers
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
// Dot product, 16 bit
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_D")>;
// Duplicate, immediate and indexed form
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZI_[BHSD]$",
- "^DUP_ZZI_[BHSDQ]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZI_[BHSD]",
+ "^DUP_ZZI_[BHSDQ]")>;
// Duplicate, scalar form
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZR_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZR_[BHSD]")>;
// Extend, sign or zero
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
- "^[SU]XTH_ZPmZ_[SD]$",
- "^[SU]XTW_ZPmZ_[D]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]XTB_ZPmZ_[HSD]",
+ "^[SU]XTH_ZPmZ_[SD]",
+ "^[SU]XTW_ZPmZ_[D]")>;
// Extract
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instrs EXT_ZZI, EXT_ZZI_B)>;
// Extract narrow saturating
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
- "^SQXTUN[BT]_ZZ_[BHS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
+ "^SQXTUN[BT]_ZZ_[BHS]")>;
// Extract/insert operation, SIMD and FP scalar form
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^LAST[AB]_VPZ_[BHSD]$",
- "^INSR_ZV_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^LAST[AB]_VPZ_[BHSD]",
+ "^INSR_ZV_[BHSD]")>;
// Extract/insert operation, scalar
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^LAST[AB]_RPZ_[BHSD]$",
- "^INSR_ZR_[BHSD]$")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^LAST[AB]_RPZ_[BHSD]",
+ "^INSR_ZR_[BHSD]")>;
// Histogram operations
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^HISTCNT_ZPzZZ_[SD]$",
- "^HISTSEG_ZZZ$")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^HISTCNT_ZPzZZ_[SD]",
+ "^HISTSEG_ZZZ")>;
// Horizontal operations, B, H, S form, immediate operands only
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_II_[BHS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_II_[BHS]")>;
// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
// operands only / immediate, scalar operands
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
// Horizontal operations, D form, immediate operands only
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs INDEX_II_D)>;
// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
// only / immediate, scalar operands
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_D")>;
// Logical
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
- (instregex "^(AND|EOR|ORR)_ZI$",
- "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
- "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
+ (instregex "^(AND|EOR|ORR)_ZI",
+ "^(AND|BIC|EOR|EOR|ORR)_ZZZ",
+ "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]",
+ "^(AND|BIC|EOR|NOT|ORR)_ZPZZ_[BHSD]")>;
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
- (instregex "^EOR(BT|TB)_ZZZ_[BHSD]$")>;
+ (instregex "^EOR(BT|TB)_ZZZ_[BHSD]")>;
// Max/min, basic and pairwise
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$",
- "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
+ "^[SU](MAX|MIN)P?_(ZPmZ|ZPZZ)_[BHSD]")>;
// Matching operations
-def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^N?MATCH_PPzZZ_[BH]")>;
// Matrix multiply-accumulate
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
// Move prefix
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
- "^MOVPRFX_ZZ$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
+ "^MOVPRFX_ZZ")>;
// Multiply, B, H, S element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$",
- "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ|ZPZZ)_[BHS]",
+ "^[SU]MULH_(ZPmZ|ZZZ|ZPZZ)_[BHS]")>;
// Multiply, D element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$",
- "^[SU]MULH_(ZPmZ|ZZZ)_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ|ZPZZ)_D",
+ "^[SU]MULH_(ZPmZ|ZZZ|ZPZZ)_D")>;
// Multiply long
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
- "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
+ "^[SU]MULL[BT]_ZZZ_[HSD]")>;
// Multiply accumulate, B, H, S element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_ZZZI_[BHS]$",
- "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_(ZZZI|ZPZZZ)_[BHS]",
+ "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
// Multiply accumulate, D element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_ZZZI_D$",
- "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_(ZZZI|ZPZZZ)_D",
+ "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
// Multiply accumulate long
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
- "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
+ "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
// Multiply accumulate saturating doubling long regular
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
- "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]",
+ "^SQDML[AS](LB|LT)_ZZZI_[SD]")>;
// Multiply saturating doubling high, B, H, S element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULH_ZZZ_[BHS]$",
- "^SQDMULH_ZZZI_[HS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULH_ZZZ_[BHS]",
+ "^SQDMULH_ZZZI_[HS]")>;
// Multiply saturating doubling high, D element size
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
// Multiply saturating doubling long
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
- "^SQDMULL[BT]_ZZZI_[SD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
+ "^SQDMULL[BT]_ZZZI_[SD]")>;
// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
// element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
- "^SQRDCMLAH_ZZZ_[BHS]$",
- "^SQRDML[AS]H_ZZZI_[HS]$",
- "^SQRDCMLAH_ZZZI_[HS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
+ "^SQRDCMLAH_ZZZ_[BHS]",
+ "^SQRDML[AS]H_ZZZI_[HS]",
+ "^SQRDCMLAH_ZZZI_[HS]")>;
// Multiply saturating rounding doubling regular/complex accumulate, D element
// size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZI?_D$",
- "^SQRDCMLAH_ZZZ_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZI?_D",
+ "^SQRDCMLAH_ZZZ_D")>;
// Multiply saturating rounding doubling regular/complex, B, H, S element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZ_[BHS]$",
- "^SQRDMULH_ZZZI_[HS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZ_[BHS]",
+ "^SQRDMULH_ZZZI_[HS]")>;
// Multiply saturating rounding doubling regular/complex, D element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZI?_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZI?_D")>;
// Multiply/multiply long, (8x8) polynomial
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^PMUL_ZZZ_B$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^PMUL_ZZZ_B")>;
-def : InstRW<[CortexA510Write<6, CortexA510UnitVMC>], (instregex "^PMULL[BT]_ZZZ_[HDQ]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVMC>], (instregex "^PMULL[BT]_ZZZ_[HDQ]")>;
// Predicate counting vector
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
- (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
+ (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI")>;
// Reciprocal estimate
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
// Reduction, arithmetic, B form
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
@@ -955,43 +960,44 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MA
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
// Reduction, logical
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]")>;
// Reverse, vector
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^REV_ZZ_[BHSD]$",
- "^REVB_ZPmZ_[HSD]$",
- "^REVH_ZPmZ_[SD]$",
- "^REVW_ZPmZ_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^REV_ZZ_[BHSD]",
+ "^REVB_ZPmZ_[HSD]",
+ "^REVH_ZPmZ_[SD]",
+ "^REVW_ZPmZ_D")>;
// Select, vector form
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^SEL_ZPZZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^SEL_ZPZZ_[BHSD]")>;
// Table lookup
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBL_ZZZZ?_[BHSD]")>;
// Table lookup extension
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBX_ZZZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBX_ZZZ_[BHSD]")>;
// Transpose, vector form
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
// Unpack and extend
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
// Zip/unzip
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
// SVE floating-point instructions
// -----------------------------------------------------------------------------
// Floating point absolute value/
diff erence
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FAB[SD]_ZPmZ_[HSD]",
+ "^FAB[SD]_ZPZZ_[HSD]")>;
// Floating point arithmetic
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
- "^FADDP_ZPmZZ_[HSD]$",
- "^FNEG_ZPmZ_[HSD]$",
- "^FSUBR_ZPm[IZ]_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ|ZPZI|ZPZZ)_[HSD]",
+ "^FADDP_ZPmZZ_[HSD]",
+ "^FNEG_ZPmZ_[HSD]",
+ "^FSUBR_(ZPm[IZ]|ZPZ[IZ])_[HSD]")>;
// Floating point associative add, F16
def : InstRW<[CortexA510MCWrite<32, 29, CortexA510UnitVALU>], (instrs FADDA_VPZ_H)>;
@@ -1003,17 +1009,17 @@ def : InstRW<[CortexA510MCWrite<16, 13, CortexA510UnitVALU>], (instrs FADDA_VPZ_
def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVALU>], (instrs FADDA_VPZ_D)>;
// Floating point compare
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
- "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
- "^FCM(LE|LT)_PPzZ0_[HSD]$",
- "^FCMUO_PPzZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FACG[ET]_PPzZZ_[HSD]",
+ "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
+ "^FCM(LE|LT)_PPzZ0_[HSD]",
+ "^FCMUO_PPzZZ_[HSD]")>;
// Floating point complex add
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCADD_ZPmZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCADD_ZPmZ_[HSD]")>;
// Floating point complex multiply add
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FCMLA_ZPmZZ_[HSD]$",
- "^FCMLA_ZZZI_[HS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FCMLA_ZPmZZ_[HSD]",
+ "^FCMLA_ZZZI_[HS]")>;
// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
@@ -1030,13 +1036,13 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVT_ZPmZ_(H
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVTX_ZPmZ_DtoS", "FCVTXNT_ZPmZ_DtoS")>;
// Floating point base2 log, F16
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FLOGB_ZPmZ_H)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
// Floating point base2 log, F32
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FLOGB_ZPmZ_S)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
// Floating point base2 log, F64
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FLOGB_ZPmZ_D)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
// Floating point convert to integer, F16
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
@@ -1049,93 +1055,92 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
(instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
// Floating point copy
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>], (instregex "^FCPY_ZPmI_[HSD]$",
- "^FDUP_ZI_[HSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>], (instregex "^FCPY_ZPmI_[HSD]",
+ "^FDUP_ZI_[HSD]")>;
// Floating point divide, F16
-def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FDIVR?_ZPmZ_H$")>;
+def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
// Floating point divide, F32
-def : InstRW<[CortexA510MCWrite<13, 10, CortexA510UnitVMC>], (instregex "^FDIVR?_ZPmZ_S$")>;
+def : InstRW<[CortexA510MCWrite<13, 10, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
// Floating point divide, F64
-def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FDIVR?_ZPmZ_D$")>;
+def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
// Floating point min/max pairwise
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
// Floating point min/max
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?_(ZPm[IZ]|ZPZZ|ZPZI)_[HSD]")>;
// Floating point multiply
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$",
- "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^(FSCALE|FMULX)_(ZPmZ|ZPZZ)_[HSD]",
+ "^FMUL_(ZPm[IZ]|ZZZI?|ZPZI|ZPZZ)_[HSD]")>;
// Floating point multiply accumulate
def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>],
- (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$",
- "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>;
+ (instregex "^FML[AS]_(ZPmZZ|ZZZI|ZPZZZ)_[HSD]",
+ "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_(ZPmZZ|ZPZZZ)_[HSD]")>;
// Floating point multiply add/sub accumulate long
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
// Floating point reciprocal estimate, F16
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H,
- FRSQRTE_ZZ_H)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FRECPE_ZZ_H", "^FRECPX_ZPmZ_H",
+ "^FRSQRTE_ZZ_H")>;
// Floating point reciprocal estimate, F32
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S,
- FRSQRTE_ZZ_S)>;
-
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FRECPE_ZZ_S", "^FRECPX_ZPmZ_S",
+ "^FRSQRTE_ZZ_S")>;
// Floating point reciprocal estimate, F64
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D,
- FRSQRTE_ZZ_D)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>],(instregex "^FRECPE_ZZ_D", "^FRECPX_ZPmZ_D",
+ "^FRSQRTE_ZZ_D")>;
// Floating point reciprocal step
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
// Floating point reduction, F16
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>],
- (instregex "^(FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_[HSD]$")>;
+ (instregex "^(FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_[HSD]")>;
// Floating point reduction, F32
def : InstRW<[CortexA510MCWrite<12, 11, CortexA510UnitVALU0>],
- (instregex "^FADDV_VPZ_H$")>;
+ (instregex "^FADDV_VPZ_H")>;
def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVALU0>],
- (instregex "^FADDV_VPZ_S$")>;
+ (instregex "^FADDV_VPZ_S")>;
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>],
- (instregex "^FADDV_VPZ_D$")>;
+ (instregex "^FADDV_VPZ_D")>;
// Floating point round to integral, F16
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
// Floating point round to integral, F32
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
// Floating point round to integral, F64
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
// Floating point square root, F16
-def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instrs FSQRT_ZPmZ_H)>;
+def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_H")>;
// Floating point square root, F32
-def : InstRW<[CortexA510MCWrite<12, 9, CortexA510UnitVMC>], (instrs FSQRT_ZPmZ_S)>;
+def : InstRW<[CortexA510MCWrite<12, 9, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_S")>;
// Floating point square root, F64
-def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instrs FSQRT_ZPmZ_D)>;
+def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_D")>;
// Floating point trigonometric exponentiation
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FEXPA_ZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FEXPA_ZZ_[HSD]")>;
// Floating point trigonometric multiply add
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTMAD_ZZI_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTMAD_ZZI_[HSD]")>;
// Floating point trigonometric, miscellaneous
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTSMUL_ZZZ_[HSD]$")>;
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FTSSEL_ZZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTSMUL_ZZZ_[HSD]")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FTSSEL_ZZZ_[HSD]")>;
// SVE BFloat16 (BF16) instructions
@@ -1151,7 +1156,7 @@ def : InstRW<[A510Write_10cyc_1VMAC_1VALU], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
def : InstRW<[A510Write_15cyc_1VMAC_1VALU], (instrs BFMMLA_ZZZ)>;
// Multiply accumulate long
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^BFMLAL[BT]_ZZZ(I)?")>;
// SVE Load instructions
// -----------------------------------------------------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 0901290bff3c1b..fd7be6ea61693b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -1998,174 +1998,172 @@ def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
// Loop control, based on GPR
def : InstRW<[V2Write_3cyc_2M],
- (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
-def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
+ (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
+def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
// Loop terminate
-def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
+def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
// Predicate counting scalar
def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
def : InstRW<[V2Write_2cyc_1M],
- (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$",
- "^SQ(DEC|INC)[BHWD]_XPiWdI$",
- "^UQ(DEC|INC)[BHWD]_WPiI$")>;
+ (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
+ "^SQ(DEC|INC)[BHWD]_XPiWdI",
+ "^UQ(DEC|INC)[BHWD]_WPiI")>;
// Predicate counting scalar, ALL, {1,2,4}
-def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI$")>;
+def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
// Predicate counting scalar, active predicate
def : InstRW<[V2Write_2cyc_1M],
- (instregex "^CNTP_XPP_[BHSD]$",
- "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
- "^(UQDEC|UQINC)P_WP_[BHSD]$",
- "^(SQDEC|SQINC)P_XPWd_[BHSD]$")>;
+ (instregex "^CNTP_XPP_[BHSD]",
+ "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
+ "^(UQDEC|UQINC)P_WP_[BHSD]",
+ "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
// Predicate counting vector, active predicate
def : InstRW<[V2Write_7cyc_1M_1M0_1V],
- (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
+ (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
// Predicate logical
def : InstRW<[V2Write_1or2cyc_1M0],
- (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
+ (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
// Predicate logical, flag setting
def : InstRW<[V2Write_1or2cyc_1M0_1M],
- (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
+ (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
// Predicate reverse
-def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>;
// Predicate select
def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>;
// Predicate set
-def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
// Predicate set/initialize, set flags
-def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]$")>;
+def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>;
// Predicate find first/next
-def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
// Predicate test
def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>;
// Predicate transpose
-def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
// Predicate unpack and widen
def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
// Predicate zip/unzip
-def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
// SVE integer instructions
// -----------------------------------------------------------------------------
// Arithmetic, absolute
diff
-def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$",
- "^[SU]ABD_ZPZZ_[BHSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
+ "^[SU]ABD_ZPZZ_[BHSD]")>;
// Arithmetic, absolute
diff accum
-def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
+def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
// Arithmetic, absolute
diff accum long
-def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
+def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
// Arithmetic, absolute
diff long
-def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
// Arithmetic, basic
def : InstRW<[V2Write_2cyc_1V],
- (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
- "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]_UNDEF$",
- "^(ADD|SUB)_ZZZ_[BHSD]$",
- "^(ADD|SUB|SUBR)_ZI_[BHSD]$",
- "^ADR_[SU]XTW_ZZZ_D_[0123]$",
- "^ADR_LSL_ZZZ_[SD]_[0123]$",
- "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$",
- "^SADDLBT_ZZZ_[HSD]$",
- "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
- "^SSUBL(BT|TB)_ZZZ_[HSD]$")>;
+ (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^(ADD|SUB)_ZZZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZI_[BHSD]",
+ "^ADR_[SU]XTW_ZZZ_D_[0123]",
+ "^ADR_LSL_ZZZ_[SD]_[0123]",
+ "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
+ "^SADDLBT_ZZZ_[HSD]",
+ "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
// Arithmetic, complex
def : InstRW<[V2Write_2cyc_1V],
- (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$",
- "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
- "^SQ(ABS|NEG)_ZPmZ_[BHSD]_UNDEF$",
- "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$",
- "^[SU]Q(ADD|SUB)_ZI_[BHSD]$",
- "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$",
- "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>;
+ (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
+ "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
+ "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
+ "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
// Arithmetic, large integer
-def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
// Arithmetic, pairwise add
-def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
// Arithmetic, pairwise add and accum long
def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA],
- (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
+ (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
// Arithmetic, shift
def : InstRW<[V2Write_2cyc_1V13],
- (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$",
- "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$",
- "^(ASR|LSL|LSR)_ZPmI_[BHSD]$",
- "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$",
- "^(ASR|LSL|LSR)_ZZI_[BHSD]$",
- "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]_UNDEF$",
- "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>;
+ (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
+ "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
+ "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
+ "^(ASR|LSL|LSR)_ZZI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
+ "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
// Arithmetic, shift and accumulate
-def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]$")>;
+def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
// Arithmetic, shift by immediate
-def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]$",
- "^[SU]SHLL[BT]_ZZI_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]",
+ "^[SU]SHLL[BT]_ZZI_[HSD]")>;
// Arithmetic, shift by immediate and insert
-def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
// Arithmetic, shift complex
def : InstRW<[V2Write_4cyc_1V13],
- (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$",
- "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$",
- "^[SU]QR?SHL_ZPZZ_[BHSD]_UNDEF$",
- "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$",
- "^SQSHRU?N[BT]_ZZI_[BHS]$",
- "^UQR?SHRN[BT]_ZZI_[BHS]$")>;
+ (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
+ "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
+ "^[SU]QR?SHL_ZPZZ_[BHSD]",
+ "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
+ "^SQSHRU?N[BT]_ZZI_[BHS]",
+ "^UQR?SHRN[BT]_ZZI_[BHS]")>;
// Arithmetic, shift right for divide
-def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_ZPmI_[BHSD]$")>;
+def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
// Arithmetic, shift rounding
-def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]$",
- "^[SU]RSHL_ZPZZ_[BHSD]_UNDEF$",
- "^[SU]RSHR_ZPmI_[BHSD]$")>;
+def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
+ "^[SU]RSHL_ZPZZ_[BHSD]",
+ "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
// Bit manipulation
-def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>;
+def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
// Bitwise select
-def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
// Count/reverse bits
-def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$",
- "^(CLS|CLZ|CNT)_ZPmZ_[BHSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
// Broadcast logical bitmask immediate to vector
def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>;
// Compare and set flags
def : InstRW<[V2Write_4or5cyc_1V0_1M0],
- (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
- "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
+ (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
+ "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
// Complex add
-def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
// Complex dot product 8-bit element
def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
@@ -2174,201 +2172,200 @@ def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
// Complex multiply-add B, H, S element size
-def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]$",
- "^CMLA_ZZZI_[HS]$")>;
+def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
+ "^CMLA_ZZZI_[HS]")>;
// Complex multiply-add D element size
def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>;
// Conditional extract operations, scalar form
-def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
+def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
// Conditional extract operations, SIMD&FP scalar and vector forms
-def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
- "^COMPACT_ZPZ_[SD]$",
- "^SPLICE_ZPZZ?_[BHSD]$")>;
+def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
+ "^COMPACT_ZPZ_[SD]",
+ "^SPLICE_ZPZZ?_[BHSD]")>;
// Convert to floating point, 64b to float or convert to double
-def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD](_UNDEF)?$",
- "^[SU]CVTF_ZPmZ_StoD(_UNDEF)?$")>;
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
+ "^[SU]CVTF_ZPmZ_StoD")>;
// Convert to floating point, 32b to single or half
-def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS](_UNDEF)?$")>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
// Convert to floating point, 16b to half
-def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH(_UNDEF)?$")>;
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
// Copy, scalar
-def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
+def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
// Copy, scalar SIMD&FP or imm
-def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
- "^CPY_ZPzI_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
+ "^CPY_ZPzI_[BHSD]")>;
// Divides, 32 bit
-def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$",
- "^[SU]DIV_ZPZZ_S_UNDEF$")>;
+def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
+ "^[SU]DIV_ZPZZ_S")>;
// Divides, 64 bit
-def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$",
- "^[SU]DIV_ZPZZ_D_UNDEF$")>;
+def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
+ "^[SU]DIV_ZPZZ_D")>;
// Dot product, 8 bit
-def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S$")>;
+def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>;
// Dot product, 8 bit, using signed and unsigned integers
def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
// Dot product, 16 bit
-def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D$")>;
+def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>;
// Duplicate, immediate and indexed form
-def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
- "^DUP_ZZI_[BHSDQ]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]",
+ "^DUP_ZZI_[BHSDQ]")>;
// Duplicate, scalar form
-def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
+def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>;
// Extend, sign or zero
-def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ(_UNDEF)?_[HSD]$",
- "^[SU]XTH_ZPmZ(_UNDEF)?_[SD]$",
- "^[SU]XTW_ZPmZ(_UNDEF)?_[D]$")>;
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]",
+ "^[SU]XTH_ZPmZ_[SD]",
+ "^[SU]XTW_ZPmZ_[D]")>;
// Extract
def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
// Extract narrow saturating
-def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
- "^SQXTUN[BT]_ZZ_[BHS]$")>;
+def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
+ "^SQXTUN[BT]_ZZ_[BHS]")>;
// Extract/insert operation, SIMD and FP scalar form
-def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
- "^INSR_ZV_[BHSD]$")>;
+def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]",
+ "^INSR_ZV_[BHSD]")>;
// Extract/insert operation, scalar
-def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$",
- "^INSR_ZR_[BHSD]$")>;
+def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]",
+ "^INSR_ZR_[BHSD]")>;
// Histogram operations
-def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
- "^HISTSEG_ZZZ$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
+ "^HISTSEG_ZZZ")>;
// Horizontal operations, B, H, S form, immediate operands only
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>;
// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
// operands only / immediate, scalar operands
-def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
+def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
// Horizontal operations, D form, immediate operands only
def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>;
// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
// only / immediate, scalar operands
-def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D$")>;
+def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>;
// Logical
def : InstRW<[V2Write_2cyc_1V],
- (instregex "^(AND|EOR|ORR)_ZI$",
- "^(AND|BIC|EOR|ORR)_ZZZ$",
- "^EOR(BT|TB)_ZZZ_[BHSD]$",
- "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$",
- "^NOT_ZPmZ_[BHSD]_UNDEF$")>;
+ (instregex "^(AND|EOR|ORR)_ZI",
+ "^(AND|BIC|EOR|ORR)_ZZZ",
+ "^EOR(BT|TB)_ZZZ_[BHSD]",
+ "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
+ "^NOT_ZPmZ_[BHSD]")>;
// Max/min, basic and pairwise
-def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$",
- "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$",
- "^[SU](MAX|MIN)_ZPZZ_[BHSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
+ "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
+ "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
// Matching operations
// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
// latency for this instruction is 4 cycles.
-def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
+def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
// Matrix multiply-accumulate
def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
// Move prefix
-def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
- "^MOVPRFX_ZZ$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
+ "^MOVPRFX_ZZ")>;
// Multiply, B, H, S element size
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$",
- "^MUL_ZPZZ_[BHS]_UNDEF$",
- "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$",
- "^[SU]MULH_ZPZZ_[BHS]_UNDEF$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
+ "^MUL_ZPZZ_[BHS]",
+ "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
+ "^[SU]MULH_ZPZZ_[BHS]")>;
// Multiply, D element size
-def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$",
- "^MUL_ZPZZ_D_UNDEF$",
- "^[SU]MULH_(ZPmZ|ZZZ)_D$",
- "^[SU]MULH_ZPZZ_D_UNDEF$")>;
+def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
+ "^MUL_ZPZZ_D",
+ "^[SU]MULH_(ZPmZ|ZZZ)_D",
+ "^[SU]MULH_ZPZZ_D")>;
// Multiply long
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
- "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
+ "^[SU]MULL[BT]_ZZZ_[HSD]")>;
// Multiply accumulate, B, H, S element size
def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS],
- (instregex "^ML[AS]_ZZZI_[HS]$", "^ML[AS]_ZPZZZ_[BHS]_UNDEF$")>;
+ (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS],
- (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>;
+ (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
// Multiply accumulate, D element size
def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD],
- (instregex "^ML[AS]_ZZZI_D$", "^ML[AS]_ZPZZZ_D_UNDEF$")>;
+ (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD],
- (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>;
+ (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
// Multiply accumulate long
-def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
- "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
+def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
+ "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
// Multiply accumulate saturating doubling long regular
def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ],
- (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]$",
- "^SQDML[AS]L[BT]_ZZZI_[SD]$")>;
+ (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
+ "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
// Multiply saturating doubling high, B, H, S element size
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]$",
- "^SQDMULH_ZZZI_[HS]$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]",
+ "^SQDMULH_ZZZI_[HS]")>;
// Multiply saturating doubling high, D element size
def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
// Multiply saturating doubling long
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
- "^SQDMULL[BT]_ZZZI_[SD]$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
+ "^SQDMULL[BT]_ZZZI_[SD]")>;
// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
// element size
-def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
- "^SQRDCMLAH_ZZZ_[BHS]$",
- "^SQRDML[AS]H_ZZZI_[HS]$",
- "^SQRDCMLAH_ZZZI_[HS]$")>;
+def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
+ "^SQRDCMLAH_ZZZ_[BHS]",
+ "^SQRDML[AS]H_ZZZI_[HS]",
+ "^SQRDCMLAH_ZZZI_[HS]")>;
// Multiply saturating rounding doubling regular/complex accumulate, D element
// size
-def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D$",
- "^SQRDCMLAH_ZZZ_D$")>;
+def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
+ "^SQRDCMLAH_ZZZ_D")>;
// Multiply saturating rounding doubling regular/complex, B, H, S element size
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]$",
- "^SQRDMULH_ZZZI_[HS]$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]",
+ "^SQRDMULH_ZZZI_[HS]")>;
// Multiply saturating rounding doubling regular/complex, D element size
-def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D$")>;
+def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>;
// Multiply/multiply long, (8x8) polynomial
-def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B$",
- "^PMULL[BT]_ZZZ_[HDQ]$")>;
+def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B",
+ "^PMULL[BT]_ZZZ_[HDQ]")>;
// Predicate counting vector
-def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
// Reciprocal estimate
-def : InstRW<[V2Write_4cyc_2V02], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S,
- URECPE_ZPmZ_S_UNDEF, URSQRTE_ZPmZ_S_UNDEF)>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
// Reduction, arithmetic, B form
def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
@@ -2383,47 +2380,47 @@ def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
// Reduction, logical
-def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
+def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
// Reverse, vector
-def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$",
- "^REVB_ZPmZ_[HSD]$",
- "^REVH_ZPmZ_[SD]$",
- "^REVW_ZPmZ_D$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]",
+ "^REVB_ZPmZ_[HSD]",
+ "^REVH_ZPmZ_[SD]",
+ "^REVW_ZPmZ_D")>;
// Select, vector form
-def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
// Table lookup
-def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
// Table lookup extension
-def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
// Transpose, vector form
-def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
// Unpack and extend
-def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
// Zip/unzip
-def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
// SVE floating-point instructions
// -----------------------------------------------------------------------------
// Floating point absolute value/
diff erence
-def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$",
- "^FABD_ZPZZ_[HSD]_UNDEF$",
- "^FABS_ZPmZ_[HSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
+ "^FABD_ZPZZ_[HSD]",
+ "^FABS_ZPmZ_[HSD]")>;
// Floating point arithmetic
-def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
- "^F(ADD|SUB)_ZPZ[IZ]_[HSD]_UNDEF$",
- "^FADDP_ZPmZZ_[HSD]$",
- "^FNEG_ZPmZ(_UNDEF)?_[HSD]$",
- "^FSUBR_ZPm[IZ]_[HSD]$",
- "^FSUBR_ZPZI_[HSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
+ "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
+ "^FADDP_ZPmZZ_[HSD]",
+ "^FNEG_ZPmZ_[HSD]",
+ "^FSUBR_ZPm[IZ]_[HSD]",
+ "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
// Floating point associative add, F16
def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>;
@@ -2435,144 +2432,138 @@ def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>;
def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
// Floating point compare
-def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
- "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
- "^FCM(LE|LT)_PPzZ0_[HSD]$",
- "^FCMUO_PPzZZ_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
+ "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
+ "^FCM(LE|LT)_PPzZ0_[HSD]",
+ "^FCMUO_PPzZZ_[HSD]")>;
// Floating point complex add
-def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
+def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
// Floating point complex multiply add
-def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]$")>;
-def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]$")>;
+def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
+def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>;
// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
-def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)(_UNDEF)?$",
- "^FCVTLT_ZPmZ_HtoS$",
- "^FCVTNT_ZPmZ_StoH$")>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
+ "^FCVTLT_ZPmZ_HtoS",
+ "^FCVTNT_ZPmZ_StoH")>;
// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
// or F64 to F16)
-def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)(_UNDEF)?$",
- "^FCVTLT_ZPmZ_StoD$",
- "^FCVTNT_ZPmZ_DtoS$")>;
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
+ "^FCVTLT_ZPmZ_StoD",
+ "^FCVTNT_ZPmZ_DtoS")>;
// Floating point convert, round to odd
def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
// Floating point base2 log, F16
-def : InstRW<[V2Write_6cyc_4V02], (instrs FLOGB_ZPmZ_H)>;
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
// Floating point base2 log, F32
-def : InstRW<[V2Write_4cyc_2V02], (instrs FLOGB_ZPmZ_S)>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
// Floating point base2 log, F64
-def : InstRW<[V2Write_3cyc_1V02], (instrs FLOGB_ZPmZ_D)>;
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
// Floating point convert to integer, F16
-def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH(_UNDEF)?$")>;
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
// Floating point convert to integer, F32
-def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)(_UNDEF)?$")>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
// Floating point convert to integer, F64
def : InstRW<[V2Write_3cyc_1V02],
- (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)(_UNDEF)?$")>;
+ (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
// Floating point copy
-def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
- "^FDUP_ZI_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]",
+ "^FDUP_ZI_[HSD]")>;
// Floating point divide, F16
-def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_ZPmZ_H$",
- "^FDIV_ZPZZ_H_UNDEF$")>;
+def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
// Floating point divide, F32
-def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_ZPmZ_S$",
- "^FDIV_ZPZZ_S_UNDEF$")>;
+def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
// Floating point divide, F64
-def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_ZPmZ_D$",
- "^FDIV_ZPZZ_D_UNDEF$")>;
+def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
// Floating point min/max pairwise
-def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
// Floating point min/max
-def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$",
- "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
+ "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
// Floating point multiply
-def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$",
- "^FMULX_ZPZZ_[HSD]_UNDEF$",
- "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$",
- "^FMUL_ZPZ[IZ]_[HSD]_UNDEF$")>;
+def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
+ "^FMULX_ZPZZ_[HSD]",
+ "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
+ "^FMUL_ZPZ[IZ]_[HSD]")>;
// Floating point multiply accumulate
def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA],
- (instregex "^FN?ML[AS]_ZPmZZ_[HSD]$",
- "^FN?(MAD|MSB)_ZPmZZ_[HSD]$")>;
+ (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
+ "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA],
- (instregex "^FML[AS]_ZZZI_[HSD]$",
- "^FN?ML[AS]_ZPZZZ_[HSD]_UNDEF$")>;
+ (instregex "^FML[AS]_ZZZI_[HSD]",
+ "^FN?ML[AS]_ZPZZZ_[HSD]")>;
// Floating point multiply add/sub accumulate long
-def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
+def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
// Floating point reciprocal estimate, F16
-def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H,
- FRSQRTE_ZZ_H, FRECPX_ZPmZ_H_UNDEF)>;
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
// Floating point reciprocal estimate, F32
-def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S,
- FRSQRTE_ZZ_S, FRECPX_ZPmZ_S_UNDEF)>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
// Floating point reciprocal estimate, F64
-def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D,
- FRSQRTE_ZZ_D, FRECPX_ZPmZ_D_UNDEF)>;
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
// Floating point reciprocal step
-def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
+def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
// Floating point reduction, F16
def : InstRW<[V2Write_8cyc_4V],
- (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>;
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
// Floating point reduction, F32
def : InstRW<[V2Write_6cyc_3V],
- (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>;
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
// Floating point reduction, F64
def : InstRW<[V2Write_4cyc_2V],
- (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
// Floating point round to integral, F16
-def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_H$")>;
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
// Floating point round to integral, F32
-def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_S$")>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
// Floating point round to integral, F64
-def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_D$")>;
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
// Floating point square root, F16
-def : InstRW<[V2Write_13cyc_1V0_12rc], (instrs FSQRT_ZPmZ_H, FSQRT_ZPmZ_H_UNDEF)>;
+def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>;
// Floating point square root, F32
-def : InstRW<[V2Write_10cyc_1V0_9rc], (instrs FSQRT_ZPmZ_S, FSQRT_ZPmZ_S_UNDEF)>;
+def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>;
// Floating point square root, F64
-def : InstRW<[V2Write_16cyc_1V0_14rc], (instrs FSQRT_ZPmZ_D, FSQRT_ZPmZ_D_UNDEF)>;
+def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>;
// Floating point trigonometric exponentiation
-def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;
+def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
// Floating point trigonometric multiply add
-def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>;
+def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
// Floating point trigonometric, miscellaneous
-def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
+def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
// SVE BFloat16 (BF16) instructions
// -----------------------------------------------------------------------------
@@ -2587,7 +2578,7 @@ def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
// Multiply accumulate long
-def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?$")>;
+def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
// SVE Load instructions
// -----------------------------------------------------------------------------
diff --git a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp
new file mode 100644
index 00000000000000..2c4f7d04af5bf8
--- /dev/null
+++ b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp
@@ -0,0 +1,90 @@
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+namespace {
+std::unique_ptr<LLVMTargetMachine> createTargetMachine(const std::string &CPU) {
+ auto TT(Triple::normalize("aarch64--"));
+
+ LLVMInitializeAArch64TargetInfo();
+ LLVMInitializeAArch64Target();
+ LLVMInitializeAArch64TargetMC();
+
+ std::string Error;
+ const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
+
+ return std::unique_ptr<LLVMTargetMachine>(static_cast<LLVMTargetMachine *>(
+ TheTarget->createTargetMachine(TT, CPU, "", TargetOptions(), std::nullopt,
+ std::nullopt, CodeGenOpt::Default)));
+}
+
+std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) {
+ AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+ std::string(TM->getTargetCPU()),
+ std::string(TM->getTargetFeatureString()), *TM, true);
+ return std::make_unique<AArch64InstrInfo>(ST);
+}
+
+void runSVEPseudoTestForCPU(const std::string &CPU) {
+
+ std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine(CPU);
+ ASSERT_TRUE(TM);
+ std::unique_ptr<AArch64InstrInfo> II = createInstrInfo(TM.get());
+ ASSERT_TRUE(II);
+
+ const MCSubtargetInfo *STI = TM->getMCSubtargetInfo();
+ MCSchedModel SchedModel = STI->getSchedModel();
+
+ for (unsigned i = 0; i < AArch64::INSTRUCTION_LIST_END; ++i) {
+ // Check if instruction is in the pseudo table
+ // i holds the opcode of the pseudo, OrigInstr holds the opcode of the
+ // original instruction
+ int OrigInstr = AArch64::getSVEPseudoMap(i);
+ if (OrigInstr == -1)
+ continue;
+
+ const MCInstrDesc &Desc = II->get(i);
+ unsigned SCClass = Desc.getSchedClass();
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCClass);
+
+ const MCInstrDesc &DescOrig = II->get(OrigInstr);
+ unsigned SCClassOrig = DescOrig.getSchedClass();
+ const MCSchedClassDesc *SCDescOrig =
+ SchedModel.getSchedClassDesc(SCClassOrig);
+
+ int Latency = 0;
+ int LatencyOrig = 0;
+
+ for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ const MCWriteLatencyEntry *WLEntryOrig =
+ STI->getWriteLatencyEntry(SCDescOrig, DefIdx);
+ Latency = std::max(Latency, static_cast<int>(WLEntry->Cycles));
+ LatencyOrig = std::max(Latency, static_cast<int>(WLEntryOrig->Cycles));
+ }
+
+ ASSERT_EQ(Latency, LatencyOrig);
+ ASSERT_TRUE(SCDesc->isValid());
+ }
+}
+
+// TODO : Add more CPUs that support SVE/SVE2
+TEST(AArch64SVESchedPseudoTesta510, IsCorrect) {
+ runSVEPseudoTestForCPU("cortex-a510");
+}
+
+TEST(AArch64SVESchedPseudoTestv2, IsCorrect) {
+ runSVEPseudoTestForCPU("neoverse-v2");
+}
+
+} // namespace
diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt
index 988d13191f4a0a..dacd919ba1e33b 100644
--- a/llvm/unittests/Target/AArch64/CMakeLists.txt
+++ b/llvm/unittests/Target/AArch64/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_target_unittest(AArch64Tests
InstSizes.cpp
MatrixRegisterAliasing.cpp
SMEAttributesTest.cpp
+ AArch64SVESchedPseudoTest.cpp
)
set_property(TARGET AArch64Tests PROPERTY FOLDER "Tests/UnitTests/TargetTests")
More information about the llvm-commits
mailing list