[llvm] d9d9be6 - [AArch64] Update SVE scheduling of some CPUs

Harvin Iriawan via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 4 02:42:06 PDT 2023


Author: Harvin Iriawan
Date: 2023-07-04T10:41:56+01:00
New Revision: d9d9be63a52dc6e908dba8f87d44192ee47ac5f8

URL: https://github.com/llvm/llvm-project/commit/d9d9be63a52dc6e908dba8f87d44192ee47ac5f8
DIFF: https://github.com/llvm/llvm-project/commit/d9d9be63a52dc6e908dba8f87d44192ee47ac5f8.diff

LOG: [AArch64] Update SVE scheduling of some CPUs

  * Update cortex-a510 and neoverse-v2 SVE scheduling so that pseudos
have the same instruction latency as original instruction.

  Differential Revision: https://reviews.llvm.org/D154084

Added: 
    llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp

Modified: 
    llvm/lib/Target/AArch64/AArch64SchedA510.td
    llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
    llvm/unittests/Target/AArch64/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 85e73b2fff6bc2..2526fe3041909e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -554,196 +554,200 @@ def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKPAS_PPzPP, BR
 
 // Loop control, based on GPR
 def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
-             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
+             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
 
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
 
 // Loop terminate
-def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
 
 // Predicate counting scalar
 def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
 
 def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
-             (instregex "^CNT[BHWD]_XPiI$")>;
+             (instregex "^CNT[BHWD]_XPiI")>;
 
 def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
-             (instregex "^(INC|DEC)[BHWD]_XPiI$")>;
+             (instregex "^(INC|DEC)[BHWD]_XPiI")>;
 
 def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
-             (instregex "^(SQINC|SQDEC|UQINC|UQDEC)[BHWD]_[XW]Pi(Wd)?I$")>;
+             (instregex "^(SQINC|SQDEC|UQINC|UQDEC)[BHWD]_[XW]Pi(Wd)?I")>;
 
 // Predicate counting scalar, active predicate
 def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
-             (instregex "^CNTP_XPP_[BHSD]$")>;
+             (instregex "^CNTP_XPP_[BHSD]")>;
 
 def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
-             (instregex "^(DEC|INC)P_XP_[BHSD]$")>;
+             (instregex "^(DEC|INC)P_XP_[BHSD]")>;
 
 def : InstRW<[CortexA510Write<8, CortexA510UnitVALU0>],
-             (instregex "^(SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
-                        "^(UQDEC|UQINC)P_WP_[BHSD]$",
-                        "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>;
+             (instregex "^(SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
+                        "^(UQDEC|UQINC)P_WP_[BHSD]",
+                        "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]")>;
 
 
 // Predicate counting vector, active predicate
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
-             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
+             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
 
 // Predicate logical
 def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
-             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
+             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
 
 // Predicate logical, flag setting
 def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
-             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
+             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
 
 // Predicate reverse
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^REV_PP_[BHSD]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^REV_PP_[BHSD]")>;
 
 // Predicate select
 def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs SEL_PPPP)>;
 
 // Predicate set
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
 
 // Predicate set/initialize, set flags
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PTRUES_[BHSD]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PTRUES_[BHSD]")>;
 
 // Predicate find first/next
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
 
 // Predicate test
 def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs PTEST_PP)>;
 
 // Predicate transpose
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^TRN[12]_PPP_[BHSDQ]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^TRN[12]_PPP_[BHSDQ]")>;
 
 // Predicate unpack and widen
 def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
 
 // Predicate zip/unzip
-def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]")>;
 
 
 // SVE integer instructions
 // -----------------------------------------------------------------------------
 // Arithmetic, absolute 
diff 
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABD_(ZPmZ|ZPZZ)_[BHSD]")>;
 
 // Arithmetic, absolute 
diff  accum
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
 
 // Arithmetic, absolute 
diff  accum long
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
 
 // Arithmetic, absolute 
diff  long
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
 
 // Arithmetic, basic
 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
-             (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$",
-                        "^(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
-                        "^(ADD|SUB)_ZZZ_[BHSD]$",
-                        "^(ADD|SUB|SUBR)_ZI_[BHSD]$",
-                        "^ADR_[SU]XTW_ZZZ_D_[0123]$",
-                        "^ADR_LSL_ZZZ_[SD]_[0123]$",
-                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$",
-                        "^SADDLBT_ZZZ_[HSD]$",
-                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
-                        "^SSUBL(BT|TB)_ZZZ_[HSD]$")>;
+             (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]",
+                        "^(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+                        "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
+                        "^(ADD|SUB)_ZZZ_[BHSD]",
+                        "^(ADD|SUB|SUBR)_ZI_[BHSD]",
+                        "^ADR_[SU]XTW_ZZZ_D_[0123]",
+                        "^ADR_LSL_ZZZ_[SD]_[0123]",
+                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
+                        "^SADDLBT_ZZZ_[HSD]",
+                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+                        "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
 
 // Arithmetic, complex
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
-             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$",
-                        "^SQ(ABS|NEG)_ZPmZ_[BHSD]$",
-                        "^SQ(ADD|SUB|SUBR)_ZPmZ_?[BHSD]$",
-                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$",
-                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]$",
-                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$",
-                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>;
+             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
+                        "^SQ(ABS|NEG)_ZPmZ_[BHSD]",
+                        "^SQ(ADD|SUB|SUBR)_ZPmZ_?[BHSD]",
+                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
+                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
+                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
+                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
 
 // Arithmetic, large integer
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
 
 // Arithmetic, pairwise add
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^ADDP_ZPmZ_[BHSD]")>;
 
 // Arithmetic, pairwise add and accum long
-def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
 
 // Arithmetic, shift
 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
-             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$",
-                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$",
-                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]$",
-                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$",
-                        "^(ASR|LSL|LSR)_ZZI_[BHSD]$",
-                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>;
+             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
+                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
+                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
+                        "^(ASR|LSL|LSR)_ZPZI_[BHSD]",
+                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
+                        "^(ASR|LSL|LSR)_ZPZZ_[BHSD]",
+                        "^(ASR|LSL|LSR)_ZZI_[BHSD]",
+                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
+// Arithmetic, shift right for divide
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+             (instregex "^ASRD_ZPmI_[BHSD]",
+                        "^ASRD_ZPZI_[BHSD]")>;
 
 // Arithmetic, shift and accumulate
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
-             (instregex "^(SSRA|USRA)_ZZI_[BHSD]$")>;
+             (instregex "^(SSRA|USRA)_ZZI_[BHSD]")>;
 
 def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>],
-             (instregex "^(SRSRA|URSRA)_ZZI_[BHSD]$")>;
+             (instregex "^(SRSRA|URSRA)_ZZI_[BHSD]")>;
 
 
 // Arithmetic, shift by immediate
 // Arithmetic, shift by immediate and insert
 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
-             (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>;
+             (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]")>;
 
 // Arithmetic, shift complex
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
-             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$",
-                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$",
-                        "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$",
-                        "^SQSHRU?N[BT]_ZZI_[BHS]$",
-                        "^UQR?SHRN[BT]_ZZI_[BHS]$")>;
-
-// Arithmetic, shift right for divide
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^ASRD_ZPmI_[BHSD]$")>;
+             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
+                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_(ZPmZ|ZPZZ)_[BHSD]",
+                        "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
+                        "^SQSHRU?N[BT]_ZZI_[BHS]",
+                        "^UQR?SHRN[BT]_ZZI_[BHS]")>;
 
 // Arithmetic, shift rounding
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
-             (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$",
-                        "^[SU]RSHR_ZPmI_[BHSD]$")>;
+             (instregex "^(SRSHL|SRSHR|SRSHLR|URSHL|URSHLR|URSHR)_(ZPmZ|ZPZZ|ZPZI)_[BHSD]",
+                        "^[SU]RSHR_ZPmI_[BHSD]")>;
 
 // Bit manipulation
 def : InstRW<[CortexA510MCWrite<14, 13, CortexA510UnitVMC>],
-             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_B$")>;
+             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_B")>;
 
 def : InstRW<[CortexA510MCWrite<22, 21, CortexA510UnitVMC>],
-             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_H$")>;
+             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_H")>;
 
 def : InstRW<[CortexA510MCWrite<38, 37, CortexA510UnitVMC>],
-             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_S$")>;
+             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_S")>;
 
 def : InstRW<[CortexA510MCWrite<70, 69, CortexA510UnitVMC>],
-             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_D$")>;
+             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_D")>;
 
 
 // Bitwise select
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
 
 // Count/reverse bits
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(CLS|CLZ|RBIT)_ZPmZ_[BHSD]$")>;
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_[BH]$")>;
-def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_S$")>;
-def : InstRW<[CortexA510Write<12, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_D$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(CLS|CLZ|RBIT)_ZPmZ_[BHSD]")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_[BH]")>;
+def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_S")>;
+def : InstRW<[CortexA510Write<12, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_D")>;
 // Broadcast logical bitmask immediate to vector
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs DUPM_ZI)>;
 
 // Compare and set flags
 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
-             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
-                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
+             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
+                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
 
 // Complex add
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CADD_ZZI_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CADD_ZZI_[BHSD]")>;
 
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^SQCADD_ZZI_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^SQCADD_ZZI_[BHSD]")>;
 
 // Complex dot product 8-bit element
 def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
@@ -752,19 +756,19 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CDOT_ZZZ_S, CDOT_
 def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
 
 // Complex multiply-add B, H, S element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^CMLA_ZZZ_[BHS]$",
-                                            "^CMLA_ZZZI_[HS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^CMLA_ZZZ_[BHS]",
+                                            "^CMLA_ZZZI_[HS]")>;
 
 // Complex multiply-add D element size
 def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CMLA_ZZZ_D)>;
 
 // Conditional extract operations, scalar form
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
 
 // Conditional extract operations, SIMD&FP scalar and vector forms
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
-                                            "^COMPACT_ZPZ_[SD]$",
-                                            "^SPLICE_ZPZZ?_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
+                                            "^COMPACT_ZPZ_[SD]",
+                                            "^SPLICE_ZPZZ?_[BHSD]")>;
 
 // Convert to floating point, 64b to float or convert to double
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]")>;
@@ -782,165 +786,166 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPm
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
 
 // Copy, scalar
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>],(instregex "^CPY_ZPmR_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>],(instregex "^CPY_ZPmR_[BHSD]")>;
 
 // Copy, scalar SIMD&FP or imm
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CPY_ZPm[IV]_[BHSD]$",
-                                           "^CPY_ZPzI_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CPY_ZPm[IV]_[BHSD]",
+                                           "^CPY_ZPzI_[BHSD]")>;
 
 // Divides, 32 bit
-def : InstRW<[CortexA510MCWrite<15, 12, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
+def : InstRW<[CortexA510MCWrite<15, 12, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_S")>;
 
 // Divides, 64 bit
-def : InstRW<[CortexA510MCWrite<26, 23, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
+def : InstRW<[CortexA510MCWrite<26, 23, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_D")>;
 
 // Dot product, 8 bit
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_S$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_S")>;
 
 // Dot product, 8 bit, using signed and unsigned integers
 def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
 
 // Dot product, 16 bit
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_D")>;
 
 // Duplicate, immediate and indexed form
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZI_[BHSD]$",
-                                           "^DUP_ZZI_[BHSDQ]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZI_[BHSD]",
+                                           "^DUP_ZZI_[BHSDQ]")>;
 
 // Duplicate, scalar form
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZR_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZR_[BHSD]")>;
 
 // Extend, sign or zero
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
-                                            "^[SU]XTH_ZPmZ_[SD]$",
-                                            "^[SU]XTW_ZPmZ_[D]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]XTB_ZPmZ_[HSD]",
+                                            "^[SU]XTH_ZPmZ_[SD]",
+                                            "^[SU]XTW_ZPmZ_[D]")>;
 
 // Extract
 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instrs EXT_ZZI, EXT_ZZI_B)>;
 
 // Extract narrow saturating
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
-                                            "^SQXTUN[BT]_ZZ_[BHS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
+                                            "^SQXTUN[BT]_ZZ_[BHS]")>;
 
 // Extract/insert operation, SIMD and FP scalar form
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^LAST[AB]_VPZ_[BHSD]$",
-                                            "^INSR_ZV_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^LAST[AB]_VPZ_[BHSD]",
+                                            "^INSR_ZV_[BHSD]")>;
 
 // Extract/insert operation, scalar
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^LAST[AB]_RPZ_[BHSD]$",
-                                                "^INSR_ZR_[BHSD]$")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^LAST[AB]_RPZ_[BHSD]",
+                                                "^INSR_ZR_[BHSD]")>;
 
 // Histogram operations
-def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^HISTCNT_ZPzZZ_[SD]$",
-                                                  "^HISTSEG_ZZZ$")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^HISTCNT_ZPzZZ_[SD]",
+                                                  "^HISTSEG_ZZZ")>;
 
 // Horizontal operations, B, H, S form, immediate operands only
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_II_[BHS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_II_[BHS]")>;
 
 // Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
 // operands only / immediate, scalar operands
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
 
 // Horizontal operations, D form, immediate operands only
 def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs INDEX_II_D)>;
 
 // Horizontal operations, D form, scalar, immediate operands)/ scalar operands
 // only / immediate, scalar operands
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_D")>;
 
 // Logical
 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
-             (instregex "^(AND|EOR|ORR)_ZI$",
-                        "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
-                        "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
+             (instregex "^(AND|EOR|ORR)_ZI",
+                        "^(AND|BIC|EOR|EOR|ORR)_ZZZ",
+                        "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]",
+                        "^(AND|BIC|EOR|NOT|ORR)_ZPZZ_[BHSD]")>;
 
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
-             (instregex "^EOR(BT|TB)_ZZZ_[BHSD]$")>;
+             (instregex "^EOR(BT|TB)_ZZZ_[BHSD]")>;
 
 // Max/min, basic and pairwise
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$",
-                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
+                                           "^[SU](MAX|MIN)P?_(ZPmZ|ZPZZ)_[BHSD]")>;
 
 // Matching operations
-def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^N?MATCH_PPzZZ_[BH]")>;
 
 // Matrix multiply-accumulate
 def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
 
 // Move prefix
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
-                                           "^MOVPRFX_ZZ$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
+                                           "^MOVPRFX_ZZ")>;
 
 // Multiply, B, H, S element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$",
-                                            "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ|ZPZZ)_[BHS]",
+                                            "^[SU]MULH_(ZPmZ|ZZZ|ZPZZ)_[BHS]")>;
 
 // Multiply, D element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$",
-                                            "^[SU]MULH_(ZPmZ|ZZZ)_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ|ZPZZ)_D",
+                                            "^[SU]MULH_(ZPmZ|ZZZ|ZPZZ)_D")>;
 
 // Multiply long
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
-                                            "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
+                                            "^[SU]MULL[BT]_ZZZ_[HSD]")>;
 
 // Multiply accumulate, B, H, S element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_ZZZI_[BHS]$",
-                                            "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_(ZZZI|ZPZZZ)_[BHS]",
+                                            "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
 
 // Multiply accumulate, D element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_ZZZI_D$",
-                                            "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_(ZZZI|ZPZZZ)_D",
+                                            "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
 
 // Multiply accumulate long
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
-                                            "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
+                                            "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
 
 // Multiply accumulate saturating doubling long regular
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
-                                            "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]",
+                                            "^SQDML[AS](LB|LT)_ZZZI_[SD]")>;
 
 // Multiply saturating doubling high, B, H, S element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULH_ZZZ_[BHS]$",
-                                            "^SQDMULH_ZZZI_[HS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULH_ZZZ_[BHS]",
+                                            "^SQDMULH_ZZZI_[HS]")>;
 
 // Multiply saturating doubling high, D element size
 def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
 
 // Multiply saturating doubling long
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
-                                            "^SQDMULL[BT]_ZZZI_[SD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
+                                            "^SQDMULL[BT]_ZZZI_[SD]")>;
 
 // Multiply saturating rounding doubling regular/complex accumulate, B, H, S
 // element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
-                                            "^SQRDCMLAH_ZZZ_[BHS]$",
-                                            "^SQRDML[AS]H_ZZZI_[HS]$",
-                                            "^SQRDCMLAH_ZZZI_[HS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
+                                            "^SQRDCMLAH_ZZZ_[BHS]",
+                                            "^SQRDML[AS]H_ZZZI_[HS]",
+                                            "^SQRDCMLAH_ZZZI_[HS]")>;
 
 // Multiply saturating rounding doubling regular/complex accumulate, D element
 // size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZI?_D$",
-                                            "^SQRDCMLAH_ZZZ_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZI?_D",
+                                            "^SQRDCMLAH_ZZZ_D")>;
 
 // Multiply saturating rounding doubling regular/complex, B, H, S element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZ_[BHS]$",
-                                            "^SQRDMULH_ZZZI_[HS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZ_[BHS]",
+                                            "^SQRDMULH_ZZZI_[HS]")>;
 
 // Multiply saturating rounding doubling regular/complex, D element size
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZI?_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZI?_D")>;
 
 // Multiply/multiply long, (8x8) polynomial
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^PMUL_ZZZ_B$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^PMUL_ZZZ_B")>;
 
-def : InstRW<[CortexA510Write<6, CortexA510UnitVMC>], (instregex "^PMULL[BT]_ZZZ_[HDQ]$")>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVMC>], (instregex "^PMULL[BT]_ZZZ_[HDQ]")>;
 
 
 // Predicate counting vector
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
-             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
+             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI")>;
 
 // Reciprocal estimate
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
 
 // Reduction, arithmetic, B form
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
@@ -955,43 +960,44 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MA
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
 
 // Reduction, logical
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]")>;
 
 // Reverse, vector
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^REV_ZZ_[BHSD]$",
-                                           "^REVB_ZPmZ_[HSD]$",
-                                           "^REVH_ZPmZ_[SD]$",
-                                           "^REVW_ZPmZ_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^REV_ZZ_[BHSD]",
+                                           "^REVB_ZPmZ_[HSD]",
+                                           "^REVH_ZPmZ_[SD]",
+                                           "^REVW_ZPmZ_D")>;
 
 // Select, vector form
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^SEL_ZPZZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^SEL_ZPZZ_[BHSD]")>;
 
 // Table lookup
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBL_ZZZZ?_[BHSD]")>;
 
 // Table lookup extension
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBX_ZZZ_[BHSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBX_ZZZ_[BHSD]")>;
 
 // Transpose, vector form
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
 
 // Unpack and extend
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
 
 // Zip/unzip
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
 
 // SVE floating-point instructions
 // -----------------------------------------------------------------------------
 
 // Floating point absolute value/
diff erence
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FAB[SD]_ZPmZ_[HSD]",
+                                                                  "^FAB[SD]_ZPZZ_[HSD]")>;
 
 // Floating point arithmetic
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
-                                           "^FADDP_ZPmZZ_[HSD]$",
-                                           "^FNEG_ZPmZ_[HSD]$",
-                                           "^FSUBR_ZPm[IZ]_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ|ZPZI|ZPZZ)_[HSD]",
+                                           "^FADDP_ZPmZZ_[HSD]",
+                                           "^FNEG_ZPmZ_[HSD]",
+                                           "^FSUBR_(ZPm[IZ]|ZPZ[IZ])_[HSD]")>;
 
 // Floating point associative add, F16
 def : InstRW<[CortexA510MCWrite<32, 29, CortexA510UnitVALU>], (instrs FADDA_VPZ_H)>;
@@ -1003,17 +1009,17 @@ def : InstRW<[CortexA510MCWrite<16, 13, CortexA510UnitVALU>], (instrs FADDA_VPZ_
 def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVALU>], (instrs FADDA_VPZ_D)>;
 
 // Floating point compare
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
-                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
-                                            "^FCM(LE|LT)_PPzZ0_[HSD]$",
-                                            "^FCMUO_PPzZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FACG[ET]_PPzZZ_[HSD]",
+                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
+                                            "^FCM(LE|LT)_PPzZ0_[HSD]",
+                                            "^FCMUO_PPzZZ_[HSD]")>;
 
 // Floating point complex add
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCADD_ZPmZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCADD_ZPmZ_[HSD]")>;
 
 // Floating point complex multiply add
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FCMLA_ZPmZZ_[HSD]$",
-                                           "^FCMLA_ZZZI_[HS]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FCMLA_ZPmZZ_[HSD]",
+                                           "^FCMLA_ZZZI_[HS]")>;
 
 // Floating point convert, long or narrow (F16 to F32 or F32 to F16)
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
@@ -1030,13 +1036,13 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVT_ZPmZ_(H
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVTX_ZPmZ_DtoS", "FCVTXNT_ZPmZ_DtoS")>;
 
 // Floating point base2 log, F16
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FLOGB_ZPmZ_H)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
 
 // Floating point base2 log, F32
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FLOGB_ZPmZ_S)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
 
 // Floating point base2 log, F64
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FLOGB_ZPmZ_D)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
 
 // Floating point convert to integer, F16
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
@@ -1049,93 +1055,92 @@ def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
              (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
 
 // Floating point copy
-def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>], (instregex "^FCPY_ZPmI_[HSD]$",
-                                           "^FDUP_ZI_[HSD]$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>], (instregex "^FCPY_ZPmI_[HSD]",
+                                           "^FDUP_ZI_[HSD]")>;
 
 // Floating point divide, F16
-def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FDIVR?_ZPmZ_H$")>;
+def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
 
 // Floating point divide, F32
-def : InstRW<[CortexA510MCWrite<13, 10, CortexA510UnitVMC>], (instregex "^FDIVR?_ZPmZ_S$")>;
+def : InstRW<[CortexA510MCWrite<13, 10, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
 
 // Floating point divide, F64
-def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FDIVR?_ZPmZ_D$")>;
+def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
 
 // Floating point min/max pairwise
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
 
 // Floating point min/max
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?_(ZPm[IZ]|ZPZZ|ZPZI)_[HSD]")>;
 
 // Floating point multiply
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$",
-                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^(FSCALE|FMULX)_(ZPmZ|ZPZZ)_[HSD]",
+                                           "^FMUL_(ZPm[IZ]|ZZZI?|ZPZI|ZPZZ)_[HSD]")>;
 
 // Floating point multiply accumulate
 def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>],
-             (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$",
-                        "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>;
+             (instregex "^FML[AS]_(ZPmZZ|ZZZI|ZPZZZ)_[HSD]",
+                        "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_(ZPmZZ|ZPZZZ)_[HSD]")>;
 
 // Floating point multiply add/sub accumulate long
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
 
 // Floating point reciprocal estimate, F16
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H,
-                                         FRSQRTE_ZZ_H)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FRECPE_ZZ_H", "^FRECPX_ZPmZ_H",
+                                         "^FRSQRTE_ZZ_H")>;
 
 // Floating point reciprocal estimate, F32
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S,
-                                         FRSQRTE_ZZ_S)>;
-
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FRECPE_ZZ_S", "^FRECPX_ZPmZ_S",
+                                         "^FRSQRTE_ZZ_S")>;
 // Floating point reciprocal estimate, F64
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D,
-                                         FRSQRTE_ZZ_D)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>],(instregex "^FRECPE_ZZ_D", "^FRECPX_ZPmZ_D",
+                                         "^FRSQRTE_ZZ_D")>;
 
 // Floating point reciprocal step
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
 
 // Floating point reduction, F16
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>],
-             (instregex "^(FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_[HSD]$")>;
+             (instregex "^(FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_[HSD]")>;
 
 // Floating point reduction, F32
 def : InstRW<[CortexA510MCWrite<12, 11, CortexA510UnitVALU0>],
-             (instregex "^FADDV_VPZ_H$")>;
+             (instregex "^FADDV_VPZ_H")>;
 
 def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVALU0>],
-             (instregex "^FADDV_VPZ_S$")>;
+             (instregex "^FADDV_VPZ_S")>;
 
 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>],
-             (instregex "^FADDV_VPZ_D$")>;
+             (instregex "^FADDV_VPZ_D")>;
 
 
 // Floating point round to integral, F16
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
 
 // Floating point round to integral, F32
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
 
 // Floating point round to integral, F64
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
 
 // Floating point square root, F16
-def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instrs FSQRT_ZPmZ_H)>;
+def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_H")>;
 
 // Floating point square root, F32
-def : InstRW<[CortexA510MCWrite<12, 9, CortexA510UnitVMC>], (instrs FSQRT_ZPmZ_S)>;
+def : InstRW<[CortexA510MCWrite<12, 9, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_S")>;
 
 // Floating point square root, F64
-def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instrs FSQRT_ZPmZ_D)>;
+def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_D")>;
 
 // Floating point trigonometric exponentiation
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FEXPA_ZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FEXPA_ZZ_[HSD]")>;
 
 // Floating point trigonometric multiply add
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTMAD_ZZI_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTMAD_ZZI_[HSD]")>;
 
 // Floating point trigonometric, miscellaneous
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTSMUL_ZZZ_[HSD]$")>;
-def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FTSSEL_ZZZ_[HSD]$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTSMUL_ZZZ_[HSD]")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FTSSEL_ZZZ_[HSD]")>;
 
 
 // SVE BFloat16 (BF16) instructions
@@ -1151,7 +1156,7 @@ def : InstRW<[A510Write_10cyc_1VMAC_1VALU], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
 def : InstRW<[A510Write_15cyc_1VMAC_1VALU], (instrs BFMMLA_ZZZ)>;
 
 // Multiply accumulate long
-def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^BFMLAL[BT]_ZZZ(I)?")>;
 
 // SVE Load instructions
 // -----------------------------------------------------------------------------

diff  --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 0901290bff3c1b..fd7be6ea61693b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -1998,174 +1998,172 @@ def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
 
 // Loop control, based on GPR
 def : InstRW<[V2Write_3cyc_2M],
-             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
-def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
+             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
+def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
 
 // Loop terminate
-def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
+def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
 
 // Predicate counting scalar
 def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
 def : InstRW<[V2Write_2cyc_1M],
-             (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$",
-                        "^SQ(DEC|INC)[BHWD]_XPiWdI$",
-                        "^UQ(DEC|INC)[BHWD]_WPiI$")>;
+             (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
+                        "^SQ(DEC|INC)[BHWD]_XPiWdI",
+                        "^UQ(DEC|INC)[BHWD]_WPiI")>;
 
 // Predicate counting scalar, ALL, {1,2,4}
-def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI$")>;
+def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
 
 // Predicate counting scalar, active predicate
 def : InstRW<[V2Write_2cyc_1M],
-             (instregex "^CNTP_XPP_[BHSD]$",
-                        "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
-                        "^(UQDEC|UQINC)P_WP_[BHSD]$",
-                        "^(SQDEC|SQINC)P_XPWd_[BHSD]$")>;
+             (instregex "^CNTP_XPP_[BHSD]",
+                        "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
+                        "^(UQDEC|UQINC)P_WP_[BHSD]",
+                        "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
 
 // Predicate counting vector, active predicate
 def : InstRW<[V2Write_7cyc_1M_1M0_1V],
-             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
+             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
 
 // Predicate logical
 def : InstRW<[V2Write_1or2cyc_1M0],
-             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
+             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
 
 // Predicate logical, flag setting
 def : InstRW<[V2Write_1or2cyc_1M0_1M],
-             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
+             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
 
 // Predicate reverse
-def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>;
 
 // Predicate select
 def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>;
 
 // Predicate set
-def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
 
 // Predicate set/initialize, set flags
-def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]$")>;
+def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>;
 
 // Predicate find first/next
-def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
 
 // Predicate test
 def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>;
 
 // Predicate transpose
-def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
 
 // Predicate unpack and widen
 def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
 
 // Predicate zip/unzip
-def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
 
 // SVE integer instructions
 // -----------------------------------------------------------------------------
 
 // Arithmetic, absolute 
diff 
-def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$",
-                                           "^[SU]ABD_ZPZZ_[BHSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
+                                           "^[SU]ABD_ZPZZ_[BHSD]")>;
 
 // Arithmetic, absolute 
diff  accum
-def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
+def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
 
 // Arithmetic, absolute 
diff  accum long
-def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
+def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
 
 // Arithmetic, absolute 
diff  long
-def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
 
 // Arithmetic, basic
 def : InstRW<[V2Write_2cyc_1V],
-             (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
-                        "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]_UNDEF$",
-                        "^(ADD|SUB)_ZZZ_[BHSD]$",
-                        "^(ADD|SUB|SUBR)_ZI_[BHSD]$",
-                        "^ADR_[SU]XTW_ZZZ_D_[0123]$",
-                        "^ADR_LSL_ZZZ_[SD]_[0123]$",
-                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$",
-                        "^SADDLBT_ZZZ_[HSD]$",
-                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
-                        "^SSUBL(BT|TB)_ZZZ_[HSD]$")>;
+             (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
+                        "^(ADD|SUB)_ZZZ_[BHSD]",
+                        "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
+                        "^(ADD|SUB|SUBR)_ZI_[BHSD]",
+                        "^ADR_[SU]XTW_ZZZ_D_[0123]",
+                        "^ADR_LSL_ZZZ_[SD]_[0123]",
+                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
+                        "^SADDLBT_ZZZ_[HSD]",
+                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+                        "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
 
 // Arithmetic, complex
 def : InstRW<[V2Write_2cyc_1V],
-             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$",
-                        "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
-                        "^SQ(ABS|NEG)_ZPmZ_[BHSD]_UNDEF$",
-                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$",
-                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]$",
-                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$",
-                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>;
+             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
+                        "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
+                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
+                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
+                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
+                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
 
 // Arithmetic, large integer
-def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
 
 // Arithmetic, pairwise add
-def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
 
 // Arithmetic, pairwise add and accum long
 def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA],
-             (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
+             (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
 
 // Arithmetic, shift
 def : InstRW<[V2Write_2cyc_1V13],
-             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$",
-                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$",
-                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]$",
-                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$",
-                        "^(ASR|LSL|LSR)_ZZI_[BHSD]$",
-                        "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]_UNDEF$",
-                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>;
+             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
+                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
+                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
+                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
+                        "^(ASR|LSL|LSR)_ZZI_[BHSD]",
+                        "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
+                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
 
 // Arithmetic, shift and accumulate
-def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]$")>;
+def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
 
 // Arithmetic, shift by immediate
-def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]$",
-                                             "^[SU]SHLL[BT]_ZZI_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]",
+                                             "^[SU]SHLL[BT]_ZZI_[HSD]")>;
 
 // Arithmetic, shift by immediate and insert
-def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
 
 // Arithmetic, shift complex
 def : InstRW<[V2Write_4cyc_1V13],
-             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$",
-                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$",
-                        "^[SU]QR?SHL_ZPZZ_[BHSD]_UNDEF$",
-                        "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$",
-                        "^SQSHRU?N[BT]_ZZI_[BHS]$",
-                        "^UQR?SHRN[BT]_ZZI_[BHS]$")>;
+             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
+                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
+                        "^[SU]QR?SHL_ZPZZ_[BHSD]",
+                        "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
+                        "^SQSHRU?N[BT]_ZZI_[BHS]",
+                        "^UQR?SHRN[BT]_ZZI_[BHS]")>;
 
 // Arithmetic, shift right for divide
-def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_ZPmI_[BHSD]$")>;
+def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
 
 // Arithmetic, shift rounding
-def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]$",
-                                             "^[SU]RSHL_ZPZZ_[BHSD]_UNDEF$",
-                                             "^[SU]RSHR_ZPmI_[BHSD]$")>;
+def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
+                                             "^[SU]RSHL_ZPZZ_[BHSD]",
+                                             "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
 
 // Bit manipulation
-def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>;
+def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
 
 // Bitwise select
-def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
 
 // Count/reverse bits
-def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$",
-                                           "^(CLS|CLZ|CNT)_ZPmZ_[BHSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
 
 // Broadcast logical bitmask immediate to vector
 def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>;
 
 // Compare and set flags
 def : InstRW<[V2Write_4or5cyc_1V0_1M0],
-             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
-                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
+             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
+                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
 
 // Complex add
-def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
 
 // Complex dot product 8-bit element
 def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
@@ -2174,201 +2172,200 @@ def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
 def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
 
 // Complex multiply-add B, H, S element size
-def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]$",
-                                                      "^CMLA_ZZZI_[HS]$")>;
+def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
+                                                      "^CMLA_ZZZI_[HS]")>;
 
 // Complex multiply-add D element size
 def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>;
 
 // Conditional extract operations, scalar form
-def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
+def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
 
 // Conditional extract operations, SIMD&FP scalar and vector forms
-def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
-                                            "^COMPACT_ZPZ_[SD]$",
-                                            "^SPLICE_ZPZZ?_[BHSD]$")>;
+def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
+                                            "^COMPACT_ZPZ_[SD]",
+                                            "^SPLICE_ZPZZ?_[BHSD]")>;
 
 // Convert to floating point, 64b to float or convert to double
-def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD](_UNDEF)?$",
-                                             "^[SU]CVTF_ZPmZ_StoD(_UNDEF)?$")>;
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
+                                             "^[SU]CVTF_ZPmZ_StoD")>;
 
 // Convert to floating point, 32b to single or half
-def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS](_UNDEF)?$")>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
 
 // Convert to floating point, 16b to half
-def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH(_UNDEF)?$")>;
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
 
 // Copy, scalar
-def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
+def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
 
 // Copy, scalar SIMD&FP or imm
-def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
-                                           "^CPY_ZPzI_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
+                                           "^CPY_ZPzI_[BHSD]")>;
 
 // Divides, 32 bit
-def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$",
-                                             "^[SU]DIV_ZPZZ_S_UNDEF$")>;
+def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
+                                             "^[SU]DIV_ZPZZ_S")>;
 
 // Divides, 64 bit
-def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$",
-                                             "^[SU]DIV_ZPZZ_D_UNDEF$")>;
+def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
+                                             "^[SU]DIV_ZPZZ_D")>;
 
 // Dot product, 8 bit
-def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S$")>;
+def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>;
 
 // Dot product, 8 bit, using signed and unsigned integers
 def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
 
 // Dot product, 16 bit
-def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D$")>;
+def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>;
 
 // Duplicate, immediate and indexed form
-def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
-                                           "^DUP_ZZI_[BHSDQ]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]",
+                                           "^DUP_ZZI_[BHSDQ]")>;
 
 // Duplicate, scalar form
-def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
+def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>;
 
 // Extend, sign or zero
-def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ(_UNDEF)?_[HSD]$",
-                                             "^[SU]XTH_ZPmZ(_UNDEF)?_[SD]$",
-                                             "^[SU]XTW_ZPmZ(_UNDEF)?_[D]$")>;
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]",
+                                             "^[SU]XTH_ZPmZ_[SD]",
+                                             "^[SU]XTW_ZPmZ_[D]")>;
 
 // Extract
 def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
 
 // Extract narrow saturating
-def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
-                                             "^SQXTUN[BT]_ZZ_[BHS]$")>;
+def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
+                                             "^SQXTUN[BT]_ZZ_[BHS]")>;
 
 // Extract/insert operation, SIMD and FP scalar form
-def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
-                                            "^INSR_ZV_[BHSD]$")>;
+def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]",
+                                            "^INSR_ZV_[BHSD]")>;
 
 // Extract/insert operation, scalar
-def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$",
-                                                "^INSR_ZR_[BHSD]$")>;
+def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]",
+                                                "^INSR_ZR_[BHSD]")>;
 
 // Histogram operations
-def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
-                                           "^HISTSEG_ZZZ$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
+                                           "^HISTSEG_ZZZ")>;
 
 // Horizontal operations, B, H, S form, immediate operands only
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>;
 
 // Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
 // operands only / immediate, scalar operands
-def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
+def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
 
 // Horizontal operations, D form, immediate operands only
 def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>;
 
 // Horizontal operations, D form, scalar, immediate operands)/ scalar operands
 // only / immediate, scalar operands
-def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D$")>;
+def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>;
 
 // Logical
 def : InstRW<[V2Write_2cyc_1V],
-             (instregex "^(AND|EOR|ORR)_ZI$",
-                        "^(AND|BIC|EOR|ORR)_ZZZ$",
-                        "^EOR(BT|TB)_ZZZ_[BHSD]$",
-                        "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$",
-                        "^NOT_ZPmZ_[BHSD]_UNDEF$")>;
+             (instregex "^(AND|EOR|ORR)_ZI",
+                        "^(AND|BIC|EOR|ORR)_ZZZ",
+                        "^EOR(BT|TB)_ZZZ_[BHSD]",
+                        "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
+                        "^NOT_ZPmZ_[BHSD]")>;
 
 // Max/min, basic and pairwise
-def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$",
-                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$",
-                                           "^[SU](MAX|MIN)_ZPZZ_[BHSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
+                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
+                                           "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
 
 // Matching operations
 // FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
 // latency for this instruction is 4 cycles.
-def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
+def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
 
 // Matrix multiply-accumulate
 def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
 
 // Move prefix
-def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
-                                           "^MOVPRFX_ZZ$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
+                                           "^MOVPRFX_ZZ")>;
 
 // Multiply, B, H, S element size
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$",
-                                             "^MUL_ZPZZ_[BHS]_UNDEF$",
-                                             "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$",
-                                             "^[SU]MULH_ZPZZ_[BHS]_UNDEF$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
+                                             "^MUL_ZPZZ_[BHS]",
+                                             "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
+                                             "^[SU]MULH_ZPZZ_[BHS]")>;
 
 // Multiply, D element size
-def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$",
-                                             "^MUL_ZPZZ_D_UNDEF$",
-                                             "^[SU]MULH_(ZPmZ|ZZZ)_D$",
-                                             "^[SU]MULH_ZPZZ_D_UNDEF$")>;
+def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
+                                             "^MUL_ZPZZ_D",
+                                             "^[SU]MULH_(ZPmZ|ZZZ)_D",
+                                             "^[SU]MULH_ZPZZ_D")>;
 
 // Multiply long
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
-                                             "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
+                                             "^[SU]MULL[BT]_ZZZ_[HSD]")>;
 
 // Multiply accumulate, B, H, S element size
 def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS],
-             (instregex "^ML[AS]_ZZZI_[HS]$", "^ML[AS]_ZPZZZ_[BHS]_UNDEF$")>;
+             (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
 def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS],
-             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>;
+             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
 
 // Multiply accumulate, D element size
 def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD],
-             (instregex "^ML[AS]_ZZZI_D$", "^ML[AS]_ZPZZZ_D_UNDEF$")>;
+             (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
 def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD],
-             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>;
+             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
 
 // Multiply accumulate long
-def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
-                                                "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
+def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
+                                                "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
 
 // Multiply accumulate saturating doubling long regular
 def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ],
-             (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]$",
-                        "^SQDML[AS]L[BT]_ZZZI_[SD]$")>;
+             (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
+                        "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
 
 // Multiply saturating doubling high, B, H, S element size
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]$",
-                                             "^SQDMULH_ZZZI_[HS]$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]",
+                                             "^SQDMULH_ZZZI_[HS]")>;
 
 // Multiply saturating doubling high, D element size
 def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
 
 // Multiply saturating doubling long
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
-                                             "^SQDMULL[BT]_ZZZI_[SD]$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
+                                             "^SQDMULL[BT]_ZZZI_[SD]")>;
 
 // Multiply saturating rounding doubling regular/complex accumulate, B, H, S
 // element size
-def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
-                                                     "^SQRDCMLAH_ZZZ_[BHS]$",
-                                                     "^SQRDML[AS]H_ZZZI_[HS]$",
-                                                     "^SQRDCMLAH_ZZZI_[HS]$")>;
+def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
+                                                     "^SQRDCMLAH_ZZZ_[BHS]",
+                                                     "^SQRDML[AS]H_ZZZI_[HS]",
+                                                     "^SQRDCMLAH_ZZZI_[HS]")>;
 
 // Multiply saturating rounding doubling regular/complex accumulate, D element
 // size
-def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D$",
-                                                   "^SQRDCMLAH_ZZZ_D$")>;
+def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
+                                                   "^SQRDCMLAH_ZZZ_D")>;
 
 // Multiply saturating rounding doubling regular/complex, B, H, S element size
-def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]$",
-                                             "^SQRDMULH_ZZZI_[HS]$")>;
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]",
+                                             "^SQRDMULH_ZZZI_[HS]")>;
 
 // Multiply saturating rounding doubling regular/complex, D element size
-def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D$")>;
+def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>;
 
 // Multiply/multiply long, (8x8) polynomial
-def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B$",
-                                             "^PMULL[BT]_ZZZ_[HDQ]$")>;
+def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B",
+                                             "^PMULL[BT]_ZZZ_[HDQ]")>;
 
 // Predicate counting vector
-def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
 
 // Reciprocal estimate
-def : InstRW<[V2Write_4cyc_2V02], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S,
-                                          URECPE_ZPmZ_S_UNDEF, URSQRTE_ZPmZ_S_UNDEF)>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
 
 // Reduction, arithmetic, B form
 def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
@@ -2383,47 +2380,47 @@ def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
 def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
 
 // Reduction, logical
-def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
+def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
 
 // Reverse, vector
-def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$",
-                                           "^REVB_ZPmZ_[HSD]$",
-                                           "^REVH_ZPmZ_[SD]$",
-                                           "^REVW_ZPmZ_D$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]",
+                                           "^REVB_ZPmZ_[HSD]",
+                                           "^REVH_ZPmZ_[SD]",
+                                           "^REVW_ZPmZ_D")>;
 
 // Select, vector form
-def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
 
 // Table lookup
-def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
 
 // Table lookup extension
-def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
 
 // Transpose, vector form
-def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
 
 // Unpack and extend
-def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
 
 // Zip/unzip
-def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
 
 // SVE floating-point instructions
 // -----------------------------------------------------------------------------
 
 // Floating point absolute value/
diff erence
-def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$",
-                                           "^FABD_ZPZZ_[HSD]_UNDEF$",
-                                           "^FABS_ZPmZ_[HSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
+                                           "^FABD_ZPZZ_[HSD]",
+                                           "^FABS_ZPmZ_[HSD]")>;
 
 // Floating point arithmetic
-def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
-                                           "^F(ADD|SUB)_ZPZ[IZ]_[HSD]_UNDEF$",
-                                           "^FADDP_ZPmZZ_[HSD]$",
-                                           "^FNEG_ZPmZ(_UNDEF)?_[HSD]$",
-                                           "^FSUBR_ZPm[IZ]_[HSD]$",
-                                           "^FSUBR_ZPZI_[HSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
+                                           "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
+                                           "^FADDP_ZPmZZ_[HSD]",
+                                           "^FNEG_ZPmZ_[HSD]",
+                                           "^FSUBR_ZPm[IZ]_[HSD]",
+                                           "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
 
 // Floating point associative add, F16
 def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>;
@@ -2435,144 +2432,138 @@ def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>;
 def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
 
 // Floating point compare
-def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
-                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
-                                            "^FCM(LE|LT)_PPzZ0_[HSD]$",
-                                            "^FCMUO_PPzZZ_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
+                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
+                                            "^FCM(LE|LT)_PPzZ0_[HSD]",
+                                            "^FCMUO_PPzZZ_[HSD]")>;
 
 // Floating point complex add
-def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
+def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
 
 // Floating point complex multiply add
-def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]$")>;
-def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA],              (instregex "^FCMLA_ZZZI_[HS]$")>;
+def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
+def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA],              (instregex "^FCMLA_ZZZI_[HS]")>;
 
 // Floating point convert, long or narrow (F16 to F32 or F32 to F16)
-def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)(_UNDEF)?$",
-                                             "^FCVTLT_ZPmZ_HtoS$",
-                                             "^FCVTNT_ZPmZ_StoH$")>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
+                                             "^FCVTLT_ZPmZ_HtoS",
+                                             "^FCVTNT_ZPmZ_StoH")>;
 
 // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
 // or F64 to F16)
-def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)(_UNDEF)?$",
-                                             "^FCVTLT_ZPmZ_StoD$",
-                                             "^FCVTNT_ZPmZ_DtoS$")>;
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
+                                             "^FCVTLT_ZPmZ_StoD",
+                                             "^FCVTNT_ZPmZ_DtoS")>;
 
 // Floating point convert, round to odd
 def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
 
 // Floating point base2 log, F16
-def : InstRW<[V2Write_6cyc_4V02], (instrs FLOGB_ZPmZ_H)>;
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
 
 // Floating point base2 log, F32
-def : InstRW<[V2Write_4cyc_2V02], (instrs FLOGB_ZPmZ_S)>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
 
 // Floating point base2 log, F64
-def : InstRW<[V2Write_3cyc_1V02], (instrs FLOGB_ZPmZ_D)>;
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
 
 // Floating point convert to integer, F16
-def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH(_UNDEF)?$")>;
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
 
 // Floating point convert to integer, F32
-def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)(_UNDEF)?$")>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
 
 // Floating point convert to integer, F64
 def : InstRW<[V2Write_3cyc_1V02],
-             (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)(_UNDEF)?$")>;
+             (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
 
 // Floating point copy
-def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
-                                           "^FDUP_ZI_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]",
+                                           "^FDUP_ZI_[HSD]")>;
 
 // Floating point divide, F16
-def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_ZPmZ_H$",
-                                                   "^FDIV_ZPZZ_H_UNDEF$")>;
+def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
 
 // Floating point divide, F32
-def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_ZPmZ_S$",
-                                                  "^FDIV_ZPZZ_S_UNDEF$")>;
+def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
 
 // Floating point divide, F64
-def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_ZPmZ_D$",
-                                                   "^FDIV_ZPZZ_D_UNDEF$")>;
+def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
 
 // Floating point min/max pairwise
-def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
 
 // Floating point min/max
-def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$",
-                                           "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]_UNDEF$")>;
+def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
+                                           "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
 
 // Floating point multiply
-def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$",
-                                           "^FMULX_ZPZZ_[HSD]_UNDEF$",
-                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$",
-                                           "^FMUL_ZPZ[IZ]_[HSD]_UNDEF$")>;
+def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
+                                           "^FMULX_ZPZZ_[HSD]",
+                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
+                                           "^FMUL_ZPZ[IZ]_[HSD]")>;
 
 // Floating point multiply accumulate
 def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA],
-             (instregex "^FN?ML[AS]_ZPmZZ_[HSD]$",
-                        "^FN?(MAD|MSB)_ZPmZZ_[HSD]$")>;
+             (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
+                        "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
 def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA],
-             (instregex "^FML[AS]_ZZZI_[HSD]$",
-                        "^FN?ML[AS]_ZPZZZ_[HSD]_UNDEF$")>;
+             (instregex "^FML[AS]_ZZZI_[HSD]",
+                        "^FN?ML[AS]_ZPZZZ_[HSD]")>;
 
 // Floating point multiply add/sub accumulate long
-def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
+def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
 
 // Floating point reciprocal estimate, F16
-def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H,
-                                          FRSQRTE_ZZ_H, FRECPX_ZPmZ_H_UNDEF)>;
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
 
 // Floating point reciprocal estimate, F32
-def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S,
-                                          FRSQRTE_ZZ_S, FRECPX_ZPmZ_S_UNDEF)>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
 
 // Floating point reciprocal estimate, F64
-def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D,
-                                          FRSQRTE_ZZ_D, FRECPX_ZPmZ_D_UNDEF)>;
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
 
 // Floating point reciprocal step
-def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
+def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
 
 // Floating point reduction, F16
 def : InstRW<[V2Write_8cyc_4V],
-             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>;
+             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
 
 // Floating point reduction, F32
 def : InstRW<[V2Write_6cyc_3V],
-             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>;
+             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
 
 // Floating point reduction, F64
 def : InstRW<[V2Write_4cyc_2V],
-             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
+             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
 
 // Floating point round to integral, F16
-def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_H$")>;
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
 
 // Floating point round to integral, F32
-def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_S$")>;
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
 
 // Floating point round to integral, F64
-def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ(_UNDEF)?_D$")>;
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
 
 // Floating point square root, F16
-def : InstRW<[V2Write_13cyc_1V0_12rc], (instrs FSQRT_ZPmZ_H, FSQRT_ZPmZ_H_UNDEF)>;
+def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>;
 
 // Floating point square root, F32
-def : InstRW<[V2Write_10cyc_1V0_9rc], (instrs FSQRT_ZPmZ_S, FSQRT_ZPmZ_S_UNDEF)>;
+def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>;
 
 // Floating point square root, F64
-def : InstRW<[V2Write_16cyc_1V0_14rc], (instrs FSQRT_ZPmZ_D, FSQRT_ZPmZ_D_UNDEF)>;
+def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>;
 
 // Floating point trigonometric exponentiation
-def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;
+def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
 
 // Floating point trigonometric multiply add
-def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>;
+def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
 
 // Floating point trigonometric, miscellaneous
-def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
+def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
 
 // SVE BFloat16 (BF16) instructions
 // -----------------------------------------------------------------------------
@@ -2587,7 +2578,7 @@ def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
 def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
 
 // Multiply accumulate long
-def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?$")>;
+def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
 
 // SVE Load instructions
 // -----------------------------------------------------------------------------

diff  --git a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp
new file mode 100644
index 00000000000000..2c4f7d04af5bf8
--- /dev/null
+++ b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp
@@ -0,0 +1,90 @@
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+namespace {
+std::unique_ptr<LLVMTargetMachine> createTargetMachine(const std::string &CPU) {
+  auto TT(Triple::normalize("aarch64--"));
+
+  LLVMInitializeAArch64TargetInfo();
+  LLVMInitializeAArch64Target();
+  LLVMInitializeAArch64TargetMC();
+
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
+
+  return std::unique_ptr<LLVMTargetMachine>(static_cast<LLVMTargetMachine *>(
+      TheTarget->createTargetMachine(TT, CPU, "", TargetOptions(), std::nullopt,
+                                     std::nullopt, CodeGenOpt::Default)));
+}
+
+std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) {
+  AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+                      std::string(TM->getTargetCPU()),
+                      std::string(TM->getTargetFeatureString()), *TM, true);
+  return std::make_unique<AArch64InstrInfo>(ST);
+}
+
+void runSVEPseudoTestForCPU(const std::string &CPU) {
+
+  std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine(CPU);
+  ASSERT_TRUE(TM);
+  std::unique_ptr<AArch64InstrInfo> II = createInstrInfo(TM.get());
+  ASSERT_TRUE(II);
+
+  const MCSubtargetInfo *STI = TM->getMCSubtargetInfo();
+  MCSchedModel SchedModel = STI->getSchedModel();
+
+  for (unsigned i = 0; i < AArch64::INSTRUCTION_LIST_END; ++i) {
+    // Check if instruction is in the pseudo table
+    // i holds the opcode of the pseudo, OrigInstr holds the opcode of the
+    // original instruction
+    int OrigInstr = AArch64::getSVEPseudoMap(i);
+    if (OrigInstr == -1)
+      continue;
+
+    const MCInstrDesc &Desc = II->get(i);
+    unsigned SCClass = Desc.getSchedClass();
+    const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCClass);
+
+    const MCInstrDesc &DescOrig = II->get(OrigInstr);
+    unsigned SCClassOrig = DescOrig.getSchedClass();
+    const MCSchedClassDesc *SCDescOrig =
+        SchedModel.getSchedClassDesc(SCClassOrig);
+
+    int Latency = 0;
+    int LatencyOrig = 0;
+
+    for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+         DefIdx != DefEnd; ++DefIdx) {
+      const MCWriteLatencyEntry *WLEntry =
+          STI->getWriteLatencyEntry(SCDesc, DefIdx);
+      const MCWriteLatencyEntry *WLEntryOrig =
+          STI->getWriteLatencyEntry(SCDescOrig, DefIdx);
+      Latency = std::max(Latency, static_cast<int>(WLEntry->Cycles));
+      LatencyOrig = std::max(Latency, static_cast<int>(WLEntryOrig->Cycles));
+    }
+
+    ASSERT_EQ(Latency, LatencyOrig);
+    ASSERT_TRUE(SCDesc->isValid());
+  }
+}
+
+// TODO : Add more CPUs that support SVE/SVE2
+TEST(AArch64SVESchedPseudoTesta510, IsCorrect) {
+  runSVEPseudoTestForCPU("cortex-a510");
+}
+
+TEST(AArch64SVESchedPseudoTestv2, IsCorrect) {
+  runSVEPseudoTestForCPU("neoverse-v2");
+}
+
+} // namespace

diff  --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt
index 988d13191f4a0a..dacd919ba1e33b 100644
--- a/llvm/unittests/Target/AArch64/CMakeLists.txt
+++ b/llvm/unittests/Target/AArch64/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_target_unittest(AArch64Tests
   InstSizes.cpp
   MatrixRegisterAliasing.cpp
   SMEAttributesTest.cpp
+  AArch64SVESchedPseudoTest.cpp
   )
 
 set_property(TARGET AArch64Tests PROPERTY FOLDER "Tests/UnitTests/TargetTests")


        


More information about the llvm-commits mailing list