[llvm] 23c658a - [AArch64] Ensure Neoverse V1 scheduling model includes all SVE pseudos. (#84187)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 7 23:09:38 PST 2024
Author: David Green
Date: 2024-03-08T07:09:33Z
New Revision: 23c658ac4183272221ef358575dca0d386096d36
URL: https://github.com/llvm/llvm-project/commit/23c658ac4183272221ef358575dca0d386096d36
DIFF: https://github.com/llvm/llvm-project/commit/23c658ac4183272221ef358575dca0d386096d36.diff
LOG: [AArch64] Ensure Neoverse V1 scheduling model includes all SVE pseudos. (#84187)
With the many pseudos used in SVE codegen it can be too easy to miss
instructions. This enables the existing test we have for checking the
scheduling info of the pseudos matches the real instructions, and
adjusts the scheduling info in the NeoverseV1 model to make sure all are
handled. In the cases I could I opted to use the same info as in the
NeoverseV2 model, to keep the differences smaller.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index e50a401f8b2aec..c7dfd64b2fb24e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -1372,18 +1372,18 @@ def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>;
// Arithmetic, basic
// Logical
def : InstRW<[V1Write_2c_1V01],
- (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$",
- "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]$",
+ (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]",
+ "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]",
"^ADR_[SU]XTW_ZZZ_D_[0123]$",
"^ADR_LSL_ZZZ_[SD]_[0123]$",
- "^[SU]ABD_ZP[mZ]Z_[BHSD]$",
- "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]$",
+ "^[SU]ABD_ZP[mZ]Z_[BHSD]",
+ "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]",
"^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$",
- "^SUBR_Z(I|P[mZ]Z)_[BHSD]$",
+ "^SUBR_Z(I|P[mZ]Z)_[BHSD]",
"^(AND|EOR|ORR)_ZI$",
- "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
+ "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZP?ZZ",
"^EOR(BT|TB)_ZZZ_[BHSD]$",
- "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
+ "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]")>;
// Arithmetic, shift
def : InstRW<[V1Write_2c_1V1],
@@ -1394,10 +1394,10 @@ def : InstRW<[V1Write_2c_1V1],
"^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
// Arithmetic, shift right for divide
-def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_ZP[mZ]I_[BHSD]$")>;
+def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
// Count/reverse bits
-def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
+def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
// Broadcast logical bitmask immediate to vector
def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>;
@@ -1420,10 +1420,10 @@ def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
"^[SU]CVTF_ZPmZ_StoD")>;
// Convert to floating point, 32b to single or half
-def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
+def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
// Convert to floating point, 16b to half
-def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
+def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
// Copy, scalar
def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>;
@@ -1432,10 +1432,12 @@ def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>;
def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>;
// Divides, 32 bit
-def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
+def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
+ "^[SU]DIV_ZPZZ_S")>;
// Divides, 64 bit
-def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
+def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
+ "^[SU]DIV_ZPZZ_D")>;
// Dot product, 8 bit
def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>;
@@ -1454,9 +1456,9 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
// Extend, sign or zero
-def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
- "^[SU]XTH_ZPmZ_[SD]$",
- "^[SU]XTW_ZPmZ_[D]$")>;
+def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]",
+ "^[SU]XTH_ZPmZ_[SD]",
+ "^[SU]XTW_ZPmZ_[D]")>;
// Extract
def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>;
@@ -1489,18 +1491,22 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
// Multiply, B, H, S element size
-def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ)_[BHS]$",
- "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
+def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
+ "^MUL_ZPZZ_[BHS]",
+ "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
+ "^[SU]MULH_ZPZZ_[BHS]")>;
// Multiply, D element size
// Multiply accumulate, D element size
-def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ)_D$",
- "^[SU]MULH_ZPmZ_D$",
- "^(MLA|MLS|MAD|MSB)_ZPmZZ_D$")>;
+def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
+ "^MUL_ZPZZ_D",
+ "^[SU]MULH_(ZPmZ|ZZZ)_D",
+ "^[SU]MULH_ZPZZ_D",
+ "^(MLA|MLS|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>;
// Multiply accumulate, B, H, S element size
// NOTE: This is not specified in the SOG.
-def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
+def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
// Predicate counting vector
def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
@@ -1547,12 +1553,17 @@ def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$",
// -----------------------------------------------------------------------------
// Floating point absolute value/
diff erence
+def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]",
+ "^FABD_ZPZZ_[HSD]",
+ "^FABS_ZPmZ_[HSD]")>;
+
// Floating point arithmetic
-def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]$",
- "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
- "^FADDP_ZPmZZ_[HSD]$",
- "^FNEG_ZPmZ_[HSD]$",
- "^FSUBR_ZPm[IZ]_[HSD]$")>;
+def : InstRW<[V1Write_2c_1V01], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
+ "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
+ "^FADDP_ZPmZZ_[HSD]",
+ "^FNEG_ZPmZ_[HSD]",
+ "^FSUBR_ZPm[IZ]_[HSD]",
+ "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
// Floating point associative add, F16
def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>;
@@ -1577,40 +1588,44 @@ def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$",
// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
// Floating point convert to integer, F32
-def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
- "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
+def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
+ "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16)
// Floating point convert to integer, F64
-def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
- "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
+def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
+ "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
// Floating point convert to integer, F16
-def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
+def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
// Floating point copy
def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$",
"^FDUP_ZI_[HSD]$")>;
// Floating point divide, F16
-def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
+def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
// Floating point divide, F32
-def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
+def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
// Floating point divide, F64
-def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
+def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
// Floating point min/max
-def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
+def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
+ "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
// Floating point multiply
-def : InstRW<[V1Write_3c_1V01], (instregex "^F(SCALE|MULX)_ZPmZ_[HSD]$",
- "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
+def : InstRW<[V1Write_3c_1V01], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
+ "^FMULX_ZPZZ_[HSD]",
+ "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
+ "^FMUL_ZPZ[IZ]_[HSD]")>;
// Floating point multiply accumulate
// Floating point reciprocal step
def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
+ "^FN?ML[AS]_ZPZZZ_[HSD]",
"^FML[AS]_ZZZI_[HSD]$",
"^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
@@ -1624,7 +1639,7 @@ def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>;
def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>;
// Floating point reciprocal exponent
-def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]$")>;
+def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]")>;
// Floating point reduction, F16
def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>;
@@ -1636,22 +1651,22 @@ def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$
def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>;
// Floating point round to integral, F16
-def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
+def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
// Floating point round to integral, F32
-def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
+def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
// Floating point round to integral, F64
-def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
+def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
// Floating point square root, F16
-def : InstRW<[V1Write_13c10_1V0], (instrs FSQRT_ZPmZ_H)>;
+def : InstRW<[V1Write_13c10_1V0], (instregex "^FSQRT_ZPmZ_H")>;
// Floating point square root, F32
-def : InstRW<[V1Write_10c7_1V0], (instrs FSQRT_ZPmZ_S)>;
+def : InstRW<[V1Write_10c7_1V0], (instregex "^FSQRT_ZPmZ_S")>;
// Floating point square root, F64
-def : InstRW<[V1Write_16c7_1V0], (instrs FSQRT_ZPmZ_D)>;
+def : InstRW<[V1Write_16c7_1V0], (instregex "^FSQRT_ZPmZ_D")>;
// Floating point trigonometric
def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$",
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 807ce40bc5eac1..f10b94523d2e03 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -2567,13 +2567,13 @@ def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
// Floating point square root, F16
-def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>;
+def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H")>;
// Floating point square root, F32
-def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>;
+def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S")>;
// Floating point square root, F64
-def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>;
+def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D")>;
// Floating point trigonometric exponentiation
def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
diff --git a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp
index 9d8633353e1f9f..6098d4e6239251 100644
--- a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp
@@ -107,6 +107,10 @@ TEST(AArch64SVESchedPseudoTesta510, IsCorrect) {
runSVEPseudoTestForCPU("cortex-a510");
}
+TEST(AArch64SVESchedPseudoTestv1, IsCorrect) {
+ runSVEPseudoTestForCPU("neoverse-v1");
+}
+
TEST(AArch64SVESchedPseudoTestv2, IsCorrect) {
runSVEPseudoTestForCPU("neoverse-v2");
}
More information about the llvm-commits
mailing list