[llvm] [LLVM][AArch64][CodeGen] Mark FFR as a reserved register. (PR #83437)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 07:36:29 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Paul Walker (paulwalker-arm)
<details>
<summary>Changes</summary>
This allows the removal of FFR related psuedo nodes that only existed to work round machine verifier failures.
---
Patch is 113.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83437.diff
28 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp (+4)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+5-5)
- (modified) llvm/lib/Target/AArch64/AArch64SchedA510.td (+26-26)
- (modified) llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td (+26-26)
- (modified) llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td (+21-21)
- (modified) llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td (+26-26)
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+68-175)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll (+60-60)
- (modified) llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll (+14-14)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-ldnf1.mir (+124-124)
- (modified) llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir (+4-4)
- (modified) llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s (+4-4)
- (modified) llvm/test/tools/llvm-mca/AArch64/Cortex/A510-sve-instructions.s (+4-4)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s (+4-4)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s (+4-4)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s (+4-4)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index b919c116445c8b..10958f0bf97b4e 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -437,6 +437,10 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
markSuperRegs(Reserved, AArch64::W16);
+ // FFR is modelled as global state that cannot be allocated.
+ if (MF.getSubtarget<AArch64Subtarget>().hasSVE())
+ Reserved.set(AArch64::FFR);
+
// SME tiles are not allocatable.
if (MF.getSubtarget<AArch64Subtarget>().hasSME()) {
for (MCPhysReg SubReg : subregs_inclusive(AArch64::ZA))
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 2b0524754b2e9d..7c98f934a1317e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -488,11 +488,11 @@ def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AAr
def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>;
let Predicates = [HasSVE] in {
- defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
- def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
- defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
- def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
- def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
+ def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
+ def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
+ def RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
+ def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
+ def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
} // End HasSVE
let Predicates = [HasSVEorSME] in {
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 5e36b6f4d34a28..68343674bc819e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -1198,24 +1198,24 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LDNT1[BHWD]_
def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LDNT1[BHWD]_ZRR$")>;
// Non temporal gather load, vector + scalar 32-bit element size
-def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
- "^LDNT1S[BH]_ZZR_S_REAL$")>;
+def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^LDNT1[BHW]_ZZR_S$",
+ "^LDNT1S[BH]_ZZR_S$")>;
// Non temporal gather load, vector + scalar 64-bit element size
-def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
-def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instrs LDNT1D_ZZR_D_REAL)>;
+def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
+def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instrs LDNT1D_ZZR_D)>;
// Contiguous first faulting load, scalar + scalar
-def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDFF1[BHWD]_REAL$",
- "^LDFF1S?B_[HSD]_REAL$",
- "^LDFF1S?H_[SD]_REAL$",
- "^LDFF1S?W_D_REAL$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDFF1[BHWD]$",
+ "^LDFF1S?B_[HSD]$",
+ "^LDFF1S?H_[SD]$",
+ "^LDFF1S?W_D$")>;
// Contiguous non faulting load, scalar + imm
-def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDNF1[BHWD]_IMM_REAL$",
- "^LDNF1S?B_[HSD]_IMM_REAL$",
- "^LDNF1S?H_[SD]_IMM_REAL$",
- "^LDNF1S?W_D_IMM_REAL$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDNF1[BHWD]_IMM$",
+ "^LDNF1S?B_[HSD]_IMM$",
+ "^LDNF1S?H_[SD]_IMM$",
+ "^LDNF1S?W_D_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[CortexA510MCWrite<3, 1, CortexA510UnitLdSt>], (instregex "^LD2[BHWD]_IMM$")>;
@@ -1236,28 +1236,28 @@ def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD4[BHW
def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD4[BHWD]$")>;
// Gather load, vector + imm, 32-bit element size
-def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
- "^GLD(FF)?1W_IMM_REAL$")>;
+def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+ "^GLD(FF)?1W_IMM$")>;
// Gather load, vector + imm, 64-bit element size
-def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
- "^GLD(FF)?1D_IMM_REAL$")>;
+def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
+ "^GLD(FF)?1D_IMM$")>;
// Gather load, 64-bit element size
def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>],
- (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
- "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
- "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
- "^GLD(FF)?1D_(SCALED_)?REAL$")>;
+ (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$",
+ "^GLD(FF)?1S?[BHW]_D(_SCALED)?$",
+ "^GLD(FF)?1D_[SU]XTW(_SCALED)?$",
+ "^GLD(FF)?1D(_SCALED)?$")>;
// Gather load, 32-bit scaled offset
def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>],
- (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
- "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+ (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED")>;
// Gather load, 32-bit unpacked unscaled offset
-def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
- "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
+ "^GLD(FF)?1W_[SU]XTW$")>;
def : InstRW<[CortexA510Write<0, CortexA510UnitVALU>], (instregex "^PRF(B|H|W|D).*")>;
// SVE Store instructions
@@ -1357,10 +1357,10 @@ def : InstRW<[CortexA510VSt<8>], (instregex "^SST1[BHW]_D$",
// -----------------------------------------------------------------------------
// Read first fault register, unpredicated
-def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs RDFFR_P_REAL)>;
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs RDFFR_P)>;
// Read first fault register, predicated
-def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFR_PPz_REAL)>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFR_PPz)>;
// Read first fault register and set flags
def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFRS_PPz)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index 8ec124954362f8..c18045e7c8f96f 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -2110,24 +2110,24 @@ def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;
// Non temporal gather load, vector + scalar 32-bit element size
-def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
- "^LDNT1S[BH]_ZZR_S_REAL$")>;
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S$",
+ "^LDNT1S[BH]_ZZR_S$")>;
// Non temporal gather load, vector + scalar 64-bit element size
-def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
-def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
+def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
+def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>;
// Contiguous first faulting load, scalar + scalar
-def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
- "^LDFF1S?B_[HSD]_REAL$",
- "^LDFF1S?H_[SD]_REAL$",
- "^LDFF1S?W_D_REAL$")>;
+def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$",
+ "^LDFF1S?B_[HSD]$",
+ "^LDFF1S?H_[SD]$",
+ "^LDFF1S?W_D$")>;
// Contiguous non faulting load, scalar + imm
-def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
- "^LDNF1S?B_[HSD]_IMM_REAL$",
- "^LDNF1S?H_[SD]_IMM_REAL$",
- "^LDNF1S?W_D_IMM_REAL$")>;
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$",
+ "^LDNF1S?B_[HSD]_IMM$",
+ "^LDNF1S?H_[SD]_IMM$",
+ "^LDNF1S?W_D_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
@@ -2148,28 +2148,28 @@ def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;
// Gather load, vector + imm, 32-bit element size
-def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
- "^GLD(FF)?1W_IMM_REAL$")>;
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+ "^GLD(FF)?1W_IMM$")>;
// Gather load, vector + imm, 64-bit element size
-def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
- "^GLD(FF)?1D_IMM_REAL$")>;
+def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
+ "^GLD(FF)?1D_IMM$")>;
// Gather load, 64-bit element size
def : InstRW<[N2Write_9cyc_2L_2V],
- (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
- "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
- "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
- "^GLD(FF)?1D_(SCALED_)?REAL$")>;
+ (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$",
+ "^GLD(FF)?1S?[BHW]_D(_SCALED)?$",
+ "^GLD(FF)?1D_[SU]XTW(_SCALED)?$",
+ "^GLD(FF)?1D(_SCALED)?$")>;
// Gather load, 32-bit scaled offset
def : InstRW<[N2Write_10cyc_2L_2V],
- (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
- "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+ (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED")>;
// Gather load, 32-bit unpacked unscaled offset
-def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
- "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
+ "^GLD(FF)?1W_[SU]XTW$")>;
// SVE Store instructions
// -----------------------------------------------------------------------------
@@ -2268,10 +2268,10 @@ def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
// -----------------------------------------------------------------------------
// Read first fault register, unpredicated
-def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
+def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P)>;
// Read first fault register, predicated
-def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
+def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz)>;
// Read first fault register and set flags
def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index 613db353cb0aaa..e50a401f8b2aec 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -1714,17 +1714,17 @@ def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>;
def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>;
// Contiguous first faulting load, scalar + scalar
-def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$",
- "^LDFF1S?H_[SD]_REAL$")>;
-def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$",
- "^LDFF1S?B_[HSD]_REAL$",
- "^LDFF1S?W_D_REAL$")>;
+def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H$",
+ "^LDFF1S?H_[SD]$")>;
+def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]$",
+ "^LDFF1S?B_[HSD]$",
+ "^LDFF1S?W_D$")>;
// Contiguous non faulting load, scalar + imm
-def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$",
- "^LDNF1S?B_[HSD]_IMM_REAL$",
- "^LDNF1S?H_[SD]_IMM_REAL$",
- "^LDNF1S?W_D_IMM_REAL$")>;
+def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM$",
+ "^LDNF1S?B_[HSD]_IMM$",
+ "^LDNF1S?H_[SD]_IMM$",
+ "^LDNF1S?W_D_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
@@ -1746,25 +1746,25 @@ def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>;
def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>;
// Gather load, vector + imm, 32-bit element size
-def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
- "^GLD(FF)?1W_IMM_REAL$")>;
+def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+ "^GLD(FF)?1W_IMM$")>;
// Gather load, vector + imm, 64-bit element size
def : InstRW<[V1Write_9c_2L_2V],
- (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
- "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$",
- "^GLD(FF)?1D_IMM_REAL$",
- "^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>;
+ (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
+ "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?(_SCALED)?$",
+ "^GLD(FF)?1D_IMM$",
+ "^GLD(FF)?1D(_[SU]XTW)?(_SCALED)?$")>;
// Gather load, 32-bit scaled offset
def : InstRW<[V1Write_11c_2L_2V],
- (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
- "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+ (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED")>;
// Gather load, 32-bit unpacked unscaled offset
def : InstRW<[V1Write_9c_1L_1V],
- (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
- "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+ (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
+ "^GLD(FF)?1W_[SU]XTW$")>;
// Prefetch
// NOTE: This is not specified in the SOG.
@@ -1848,12 +1848,12 @@ def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
// Read first fault register, unpredicated
// Set first fault register
// Write to first fault register
-def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL,
+def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P,
SETFFR,
WRFFR)>;
// Read first fault register, predicated
-def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>;
+def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz)>;
// Read first fault register and set flags
def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index e7de40fdf1deb0..807ce40bc5eac1 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -2635,24 +2635,24 @@ def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
// Non temporal gather load, vector + scalar 32-bit element size
-def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
- "^LDNT1S[BH]_ZZR_S_REAL$")>;
+def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S$",
+ "^LDNT1S[BH]_ZZR_S$")>;
// Non temporal gather load, vector + scalar 64-bit element size
-def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
-def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
+def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
+def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>;
// Contiguous first faulting load, scalar + scalar
-def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
- "^LDFF1S?B_[HSD]_REAL$",
- "^LDFF1S?H_[SD]_REAL$",
- "^LDFF1S?W_D_REAL$")>;
+def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$",
+ "^LDFF1S?B_[HSD]$",
+ "^LDFF1S?H_[SD]$",
+ "^LDFF1S?W_D$")>;
// Contiguous non faulting load, scalar + imm
-def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
- "^LDNF1S?B_[HSD]_IMM_REAL$",
- "^LDNF1S?H_[SD]_IMM_REAL$",
- "^LDNF1S?W_D_IMM_REAL$")>;
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$",
+ "^LDNF1S?B_[HSD]_IMM$",
+ "^LDNF1S?H_[SD]_IMM$",
+ "^LDNF1S?W_D_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
@@ -2673,33 +2673,33 @@ def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>;
// Gather load, vector + imm, 32-bit element size
-def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
- "^GLD(FF)?1W_IMM_REAL$")>;
+def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+ ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83437
More information about the llvm-commits
mailing list