[llvm] [LLVM][AArch64][CodeGen] Mark FFR as a reserved register. (PR #83437)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 07:35:58 PST 2024
https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/83437
This allows the removal of FFR related psuedo nodes that only existed to work round machine verifier failures.
>From 55569e77a8a6ee0996b0f9bef2575126881e6f18 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Thu, 29 Feb 2024 14:52:25 +0000
Subject: [PATCH] [LLVM][AArch64][CodeGen] Mark FFR as a reserved register.
This allows the removal of FFR related psuedo nodes that only
existed to work round machine verifier failures.
---
.../Target/AArch64/AArch64RegisterInfo.cpp | 4 +
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 +-
llvm/lib/Target/AArch64/AArch64SchedA510.td | 52 ++--
.../Target/AArch64/AArch64SchedNeoverseN2.td | 52 ++--
.../Target/AArch64/AArch64SchedNeoverseV1.td | 42 +--
.../Target/AArch64/AArch64SchedNeoverseV2.td | 52 ++--
llvm/lib/Target/AArch64/SVEInstrFormats.td | 243 +++++------------
.../AArch64/sve-fixed-length-mask-opt.ll | 4 +-
.../AArch64/sve-fixed-length-masked-gather.ll | 120 ++++-----
.../CodeGen/AArch64/sve-fp-int-min-max.ll | 2 +-
.../AArch64/sve-gather-scatter-dag-combine.ll | 28 +-
...cs-ff-gather-loads-32bit-scaled-offsets.ll | 2 +-
...-ff-gather-loads-32bit-unscaled-offsets.ll | 2 +-
...ics-ff-gather-loads-64bit-scaled-offset.ll | 2 +-
...s-ff-gather-loads-64bit-unscaled-offset.ll | 2 +-
...-ff-gather-loads-vector-base-imm-offset.ll | 2 +-
...-gather-loads-vector-base-scalar-offset.ll | 2 +-
.../sve-intrinsics-ffr-manipulation.ll | 2 +-
.../AArch64/sve-intrinsics-loads-ff.ll | 2 +-
.../AArch64/sve-intrinsics-loads-nf.ll | 2 +-
llvm/test/CodeGen/AArch64/sve-ldnf1.mir | 248 +++++++++---------
.../AArch64/sve-masked-gather-legalize.ll | 2 +-
.../AArch64/sve-ptest-removal-rdffr.mir | 8 +-
.../AArch64/A64FX/A64FX-sve-instructions.s | 8 +-
.../AArch64/Cortex/A510-sve-instructions.s | 8 +-
.../AArch64/Neoverse/N2-sve-instructions.s | 8 +-
.../AArch64/Neoverse/V1-sve-instructions.s | 8 +-
.../AArch64/Neoverse/V2-sve-instructions.s | 8 +-
28 files changed, 411 insertions(+), 514 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index b919c116445c8b..10958f0bf97b4e 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -437,6 +437,10 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
markSuperRegs(Reserved, AArch64::W16);
+ // FFR is modelled as global state that cannot be allocated.
+ if (MF.getSubtarget<AArch64Subtarget>().hasSVE())
+ Reserved.set(AArch64::FFR);
+
// SME tiles are not allocatable.
if (MF.getSubtarget<AArch64Subtarget>().hasSME()) {
for (MCPhysReg SubReg : subregs_inclusive(AArch64::ZA))
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 2b0524754b2e9d..7c98f934a1317e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -488,11 +488,11 @@ def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AAr
def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>;
let Predicates = [HasSVE] in {
- defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
- def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
- defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
- def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
- def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
+ def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
+ def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
+ def RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
+ def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
+ def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
} // End HasSVE
let Predicates = [HasSVEorSME] in {
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 5e36b6f4d34a28..68343674bc819e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -1198,24 +1198,24 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LDNT1[BHWD]_
def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LDNT1[BHWD]_ZRR$")>;
// Non temporal gather load, vector + scalar 32-bit element size
-def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
- "^LDNT1S[BH]_ZZR_S_REAL$")>;
+def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^LDNT1[BHW]_ZZR_S$",
+ "^LDNT1S[BH]_ZZR_S$")>;
// Non temporal gather load, vector + scalar 64-bit element size
-def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
-def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instrs LDNT1D_ZZR_D_REAL)>;
+def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
+def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instrs LDNT1D_ZZR_D)>;
// Contiguous first faulting load, scalar + scalar
-def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDFF1[BHWD]_REAL$",
- "^LDFF1S?B_[HSD]_REAL$",
- "^LDFF1S?H_[SD]_REAL$",
- "^LDFF1S?W_D_REAL$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDFF1[BHWD]$",
+ "^LDFF1S?B_[HSD]$",
+ "^LDFF1S?H_[SD]$",
+ "^LDFF1S?W_D$")>;
// Contiguous non faulting load, scalar + imm
-def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDNF1[BHWD]_IMM_REAL$",
- "^LDNF1S?B_[HSD]_IMM_REAL$",
- "^LDNF1S?H_[SD]_IMM_REAL$",
- "^LDNF1S?W_D_IMM_REAL$")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDNF1[BHWD]_IMM$",
+ "^LDNF1S?B_[HSD]_IMM$",
+ "^LDNF1S?H_[SD]_IMM$",
+ "^LDNF1S?W_D_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[CortexA510MCWrite<3, 1, CortexA510UnitLdSt>], (instregex "^LD2[BHWD]_IMM$")>;
@@ -1236,28 +1236,28 @@ def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD4[BHW
def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD4[BHWD]$")>;
// Gather load, vector + imm, 32-bit element size
-def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
- "^GLD(FF)?1W_IMM_REAL$")>;
+def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+ "^GLD(FF)?1W_IMM$")>;
// Gather load, vector + imm, 64-bit element size
-def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
- "^GLD(FF)?1D_IMM_REAL$")>;
+def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
+ "^GLD(FF)?1D_IMM$")>;
// Gather load, 64-bit element size
def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>],
- (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
- "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
- "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
- "^GLD(FF)?1D_(SCALED_)?REAL$")>;
+ (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$",
+ "^GLD(FF)?1S?[BHW]_D(_SCALED)?$",
+ "^GLD(FF)?1D_[SU]XTW(_SCALED)?$",
+ "^GLD(FF)?1D(_SCALED)?$")>;
// Gather load, 32-bit scaled offset
def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>],
- (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
- "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+ (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED")>;
// Gather load, 32-bit unpacked unscaled offset
-def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
- "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
+ "^GLD(FF)?1W_[SU]XTW$")>;
def : InstRW<[CortexA510Write<0, CortexA510UnitVALU>], (instregex "^PRF(B|H|W|D).*")>;
// SVE Store instructions
@@ -1357,10 +1357,10 @@ def : InstRW<[CortexA510VSt<8>], (instregex "^SST1[BHW]_D$",
// -----------------------------------------------------------------------------
// Read first fault register, unpredicated
-def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs RDFFR_P_REAL)>;
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs RDFFR_P)>;
// Read first fault register, predicated
-def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFR_PPz_REAL)>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFR_PPz)>;
// Read first fault register and set flags
def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFRS_PPz)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index 8ec124954362f8..c18045e7c8f96f 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -2110,24 +2110,24 @@ def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;
// Non temporal gather load, vector + scalar 32-bit element size
-def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
- "^LDNT1S[BH]_ZZR_S_REAL$")>;
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S$",
+ "^LDNT1S[BH]_ZZR_S$")>;
// Non temporal gather load, vector + scalar 64-bit element size
-def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
-def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
+def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
+def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>;
// Contiguous first faulting load, scalar + scalar
-def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
- "^LDFF1S?B_[HSD]_REAL$",
- "^LDFF1S?H_[SD]_REAL$",
- "^LDFF1S?W_D_REAL$")>;
+def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$",
+ "^LDFF1S?B_[HSD]$",
+ "^LDFF1S?H_[SD]$",
+ "^LDFF1S?W_D$")>;
// Contiguous non faulting load, scalar + imm
-def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
- "^LDNF1S?B_[HSD]_IMM_REAL$",
- "^LDNF1S?H_[SD]_IMM_REAL$",
- "^LDNF1S?W_D_IMM_REAL$")>;
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$",
+ "^LDNF1S?B_[HSD]_IMM$",
+ "^LDNF1S?H_[SD]_IMM$",
+ "^LDNF1S?W_D_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
@@ -2148,28 +2148,28 @@ def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;
// Gather load, vector + imm, 32-bit element size
-def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
- "^GLD(FF)?1W_IMM_REAL$")>;
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+ "^GLD(FF)?1W_IMM$")>;
// Gather load, vector + imm, 64-bit element size
-def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
- "^GLD(FF)?1D_IMM_REAL$")>;
+def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
+ "^GLD(FF)?1D_IMM$")>;
// Gather load, 64-bit element size
def : InstRW<[N2Write_9cyc_2L_2V],
- (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
- "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
- "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
- "^GLD(FF)?1D_(SCALED_)?REAL$")>;
+ (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$",
+ "^GLD(FF)?1S?[BHW]_D(_SCALED)?$",
+ "^GLD(FF)?1D_[SU]XTW(_SCALED)?$",
+ "^GLD(FF)?1D(_SCALED)?$")>;
// Gather load, 32-bit scaled offset
def : InstRW<[N2Write_10cyc_2L_2V],
- (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
- "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+ (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED")>;
// Gather load, 32-bit unpacked unscaled offset
-def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
- "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
+ "^GLD(FF)?1W_[SU]XTW$")>;
// SVE Store instructions
// -----------------------------------------------------------------------------
@@ -2268,10 +2268,10 @@ def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
// -----------------------------------------------------------------------------
// Read first fault register, unpredicated
-def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
+def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P)>;
// Read first fault register, predicated
-def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
+def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz)>;
// Read first fault register and set flags
def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index 613db353cb0aaa..e50a401f8b2aec 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -1714,17 +1714,17 @@ def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>;
def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>;
// Contiguous first faulting load, scalar + scalar
-def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$",
- "^LDFF1S?H_[SD]_REAL$")>;
-def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$",
- "^LDFF1S?B_[HSD]_REAL$",
- "^LDFF1S?W_D_REAL$")>;
+def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H$",
+ "^LDFF1S?H_[SD]$")>;
+def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]$",
+ "^LDFF1S?B_[HSD]$",
+ "^LDFF1S?W_D$")>;
// Contiguous non faulting load, scalar + imm
-def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$",
- "^LDNF1S?B_[HSD]_IMM_REAL$",
- "^LDNF1S?H_[SD]_IMM_REAL$",
- "^LDNF1S?W_D_IMM_REAL$")>;
+def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM$",
+ "^LDNF1S?B_[HSD]_IMM$",
+ "^LDNF1S?H_[SD]_IMM$",
+ "^LDNF1S?W_D_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
@@ -1746,25 +1746,25 @@ def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>;
def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>;
// Gather load, vector + imm, 32-bit element size
-def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
- "^GLD(FF)?1W_IMM_REAL$")>;
+def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+ "^GLD(FF)?1W_IMM$")>;
// Gather load, vector + imm, 64-bit element size
def : InstRW<[V1Write_9c_2L_2V],
- (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
- "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$",
- "^GLD(FF)?1D_IMM_REAL$",
- "^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>;
+ (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
+ "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?(_SCALED)?$",
+ "^GLD(FF)?1D_IMM$",
+ "^GLD(FF)?1D(_[SU]XTW)?(_SCALED)?$")>;
// Gather load, 32-bit scaled offset
def : InstRW<[V1Write_11c_2L_2V],
- (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
- "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+ (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED")>;
// Gather load, 32-bit unpacked unscaled offset
def : InstRW<[V1Write_9c_1L_1V],
- (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
- "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+ (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
+ "^GLD(FF)?1W_[SU]XTW$")>;
// Prefetch
// NOTE: This is not specified in the SOG.
@@ -1848,12 +1848,12 @@ def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
// Read first fault register, unpredicated
// Set first fault register
// Write to first fault register
-def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL,
+def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P,
SETFFR,
WRFFR)>;
// Read first fault register, predicated
-def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>;
+def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz)>;
// Read first fault register and set flags
def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index e7de40fdf1deb0..807ce40bc5eac1 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -2635,24 +2635,24 @@ def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
// Non temporal gather load, vector + scalar 32-bit element size
-def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
- "^LDNT1S[BH]_ZZR_S_REAL$")>;
+def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S$",
+ "^LDNT1S[BH]_ZZR_S$")>;
// Non temporal gather load, vector + scalar 64-bit element size
-def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
-def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
+def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
+def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>;
// Contiguous first faulting load, scalar + scalar
-def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
- "^LDFF1S?B_[HSD]_REAL$",
- "^LDFF1S?H_[SD]_REAL$",
- "^LDFF1S?W_D_REAL$")>;
+def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$",
+ "^LDFF1S?B_[HSD]$",
+ "^LDFF1S?H_[SD]$",
+ "^LDFF1S?W_D$")>;
// Contiguous non faulting load, scalar + imm
-def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
- "^LDNF1S?B_[HSD]_IMM_REAL$",
- "^LDNF1S?H_[SD]_IMM_REAL$",
- "^LDNF1S?W_D_IMM_REAL$")>;
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$",
+ "^LDNF1S?B_[HSD]_IMM$",
+ "^LDNF1S?H_[SD]_IMM$",
+ "^LDNF1S?W_D_IMM$")>;
// Contiguous Load two structures to two vectors, scalar + imm
def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
@@ -2673,33 +2673,33 @@ def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>;
// Gather load, vector + imm, 32-bit element size
-def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
- "^GLD(FF)?1W_IMM_REAL$")>;
+def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+ "^GLD(FF)?1W_IMM$")>;
// Gather load, vector + imm, 64-bit element size
-def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
- "^GLD(FF)?1D_IMM_REAL$")>;
+def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
+ "^GLD(FF)?1D_IMM$")>;
// Gather load, 32-bit scaled offset
def : InstRW<[V2Write_10cyc_1L_8V],
- (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED_REAL$",
- "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+ (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED")>;
// Gather load, 64-bit scaled offset
// NOTE: These instructions are not specified in the SOG.
def : InstRW<[V2Write_10cyc_1L_4V],
- (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED_REAL$",
- "^GLD(FF)?1D_([SU]XTW_)?SCALED_REAL$")>;
+ (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED$",
+ "^GLD(FF)?1D_([SU]XTW_)?SCALED$")>;
// Gather load, 32-bit unpacked unscaled offset
-def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
- "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
+ "^GLD(FF)?1W_[SU]XTW$")>;
// Gather load, 64-bit unpacked unscaled offset
// NOTE: These instructions are not specified in the SOG.
def : InstRW<[V2Write_9cyc_1L_2V],
- (instregex "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?REAL$",
- "^GLD(FF)?1D_([SU]XTW_)?REAL$")>;
+ (instregex "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?$",
+ "^GLD(FF)?1D(_[SU]XTW)?$")>;
// SVE Store instructions
// -----------------------------------------------------------------------------
@@ -2790,10 +2790,10 @@ def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$",
// -----------------------------------------------------------------------------
// Read first fault register, unpredicated
-def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
+def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P)>;
// Read first fault register, predicated
-def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
+def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz)>;
// Read first fault register and set flags
def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 789ec817d3d8b8..58ba1ba0c190d6 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -6412,33 +6412,33 @@ class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
multiclass sve2_mem_sstnt_vs_32_ptrs<bits<3> opc, string asm,
SDPatternOperator op,
ValueType vt> {
- def _REAL : sve2_mem_sstnt_vs_base<opc, asm, Z_s, ZPR32>;
+ def NAME : sve2_mem_sstnt_vs_base<opc, asm, Z_s, ZPR32>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
+ (!cast<Instruction>(NAME) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
+ (!cast<Instruction>(NAME) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;
+ (!cast<Instruction>(NAME) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;
def : Pat <(op (nxv4i32 ZPR32:$Zt), (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zn), (i64 GPR64:$Rm), vt),
- (!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm)>;
+ (!cast<Instruction>(NAME) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm)>;
}
multiclass sve2_mem_sstnt_vs_64_ptrs<bits<3> opc, string asm,
SDPatternOperator op,
ValueType vt> {
- def _REAL : sve2_mem_sstnt_vs_base<opc, asm, Z_d, ZPR64>;
+ def NAME : sve2_mem_sstnt_vs_base<opc, asm, Z_d, ZPR64>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
+ (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
+ (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;
+ (!cast<Instruction>(NAME) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;
def : Pat <(op (nxv2i64 ZPR64:$Zt), (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64:$Rm), vt),
- (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
+ (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
}
class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
@@ -6773,11 +6773,11 @@ multiclass sve_int_perm_punpk<bit opc, string asm, SDPatternOperator op> {
def : SVE_1_Op_Pat<nxv2i1, op, nxv4i1, !cast<Instruction>(NAME)>;
}
-class sve_int_rdffr_pred<bit s, string asm>
+class sve_int_rdffr_pred<bit s, string asm, SDPatternOperator op = null_frag>
: I<(outs PPR8:$Pd), (ins PPRAny:$Pg),
asm, "\t$Pd, $Pg/z",
"",
- []>, Sched<[]> {
+ [(set (nxv16i1 PPR8:$Pd), (op (nxv16i1 PPRAny:$Pg)))]>, Sched<[]> {
bits<4> Pd;
bits<4> Pg;
let Inst{31-23} = 0b001001010;
@@ -6792,22 +6792,11 @@ class sve_int_rdffr_pred<bit s, string asm>
let hasSideEffects = 1;
}
-multiclass sve_int_rdffr_pred<bit s, string asm, SDPatternOperator op> {
- def _REAL : sve_int_rdffr_pred<s, asm>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def "" : Pseudo<(outs PPR8:$Pd), (ins PPRAny:$Pg), [(set (nxv16i1 PPR8:$Pd), (op (nxv16i1 PPRAny:$Pg)))]>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) PPR8:$Pd, PPRAny:$Pg)>;
- }
-}
-
-class sve_int_rdffr_unpred<string asm> : I<
+class sve_int_rdffr_unpred<string asm, SDPatternOperator op> : I<
(outs PPR8:$Pd), (ins),
asm, "\t$Pd",
"",
- []>, Sched<[]> {
+ [(set (nxv16i1 PPR8:$Pd), (op))]>, Sched<[]> {
bits<4> Pd;
let Inst{31-4} = 0b0010010100011001111100000000;
let Inst{3-0} = Pd;
@@ -6816,17 +6805,6 @@ class sve_int_rdffr_unpred<string asm> : I<
let hasSideEffects = 1;
}
-multiclass sve_int_rdffr_unpred<string asm, SDPatternOperator op> {
- def _REAL : sve_int_rdffr_unpred<asm>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def "" : Pseudo<(outs PPR8:$Pd), (ins), [(set (nxv16i1 PPR8:$Pd), (op))]>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) PPR8:$Pd)>;
- }
-}
-
class sve_int_wrffr<string asm, SDPatternOperator op>
: I<(outs), (ins PPR8:$Pn),
asm, "\t$Pn",
@@ -7318,9 +7296,9 @@ class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
let mayLoad = 1;
}
-multiclass sve_mem_cld_si<bits<4> dtype, string asm, RegisterOperand listty,
- ZPRRegOp zprty> {
- def "" : sve_mem_cld_si_base<dtype, 0, asm, listty>;
+multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
+ RegisterOperand listty, ZPRRegOp zprty> {
+ def NAME : sve_mem_cld_si_base<dtype, nf, asm, listty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
(!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
@@ -7330,6 +7308,14 @@ multiclass sve_mem_cld_si<bits<4> dtype, string asm, RegisterOperand listty,
(!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
}
+multiclass sve_mem_cld_si<bits<4> dtype, string asm, RegisterOperand listty,
+ ZPRRegOp zprty>
+: sve_mem_cld_si_base<dtype, 0, asm, listty, zprty>;
+
+multiclass sve_mem_cldnf_si<bits<4> dtype, string asm, RegisterOperand listty,
+ ZPRRegOp zprty>
+: sve_mem_cld_si_base<dtype, 1, asm, listty, zprty>;
+
class sve_mem_cldnt_si_base<bits<2> msz, string asm, RegisterOperand VecList>
: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4),
asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
@@ -7518,7 +7504,7 @@ class sve_mem_cld_ss_base<bits<4> dtype, bit ff, dag iops, string asm,
multiclass sve_mem_cld_ss<bits<4> dtype, string asm, RegisterOperand listty,
ZPRRegOp zprty, RegisterOperand gprty> {
- def "" : sve_mem_cld_ss_base<dtype, 0, (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ def NAME : sve_mem_cld_ss_base<dtype, 0, (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
asm, listty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
@@ -7527,43 +7513,16 @@ multiclass sve_mem_cld_ss<bits<4> dtype, string asm, RegisterOperand listty,
multiclass sve_mem_cldff_ss<bits<4> dtype, string asm, RegisterOperand listty,
ZPRRegOp zprty, RegisterOperand gprty> {
- def _REAL : sve_mem_cld_ss_base<dtype, 1, (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
- asm, listty>;
+ def NAME : sve_mem_cld_ss_base<dtype, 1, (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), asm, listty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
-
- def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
- (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm)>;
- }
-}
-
-multiclass sve_mem_cldnf_si<bits<4> dtype, string asm, RegisterOperand listty,
- ZPRRegOp zprty> {
- def _REAL : sve_mem_cld_si_base<dtype, 1, asm, listty>;
+ (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
- def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
- def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
- (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {
- def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4)>;
- }
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
}
class sve_mem_eld_si<bits<2> sz, bits<3> nregs, RegisterOperand VecList,
@@ -7664,22 +7623,13 @@ multiclass sve_mem_32b_gld_sv_32_scaled<bits<4> opc, string asm,
RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd,
ValueType vt> {
- def _UXTW_SCALED_REAL : sve_mem_32b_gld_sv<opc, 0, 1, asm, uxtw_opnd>;
- def _SXTW_SCALED_REAL : sve_mem_32b_gld_sv<opc, 1, 1, asm, sxtw_opnd>;
+ def _UXTW_SCALED : sve_mem_32b_gld_sv<opc, 0, 1, asm, uxtw_opnd>;
+ def _SXTW_SCALED : sve_mem_32b_gld_sv<opc, 1, 1, asm, sxtw_opnd>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _UXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ (!cast<Instruction>(NAME # _UXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def _UXTW_SCALED : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_SCALED_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
- def _SXTW_SCALED : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
- }
+ (!cast<Instruction>(NAME # _SXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)),
(!cast<Instruction>(NAME # _UXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
@@ -7693,22 +7643,13 @@ multiclass sve_mem_32b_gld_vs_32_unscaled<bits<4> opc, string asm,
RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd,
ValueType vt> {
- def _UXTW_REAL : sve_mem_32b_gld_sv<opc, 0, 0, asm, uxtw_opnd>;
- def _SXTW_REAL : sve_mem_32b_gld_sv<opc, 1, 0, asm, sxtw_opnd>;
+ def _UXTW : sve_mem_32b_gld_sv<opc, 0, 0, asm, uxtw_opnd>;
+ def _SXTW : sve_mem_32b_gld_sv<opc, 1, 0, asm, sxtw_opnd>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _UXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ (!cast<Instruction>(NAME # _UXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def _UXTW : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
- def _SXTW : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
- }
+ (!cast<Instruction>(NAME # _SXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)),
(!cast<Instruction>(NAME # _UXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
@@ -7745,21 +7686,14 @@ class sve_mem_32b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty,
SDPatternOperator op, ValueType vt> {
- def _IMM_REAL : sve_mem_32b_gld_vi<opc, asm, imm_ty>;
+ def _IMM : sve_mem_32b_gld_vi<opc, asm, imm_ty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
- (!cast<Instruction>(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>;
+ (!cast<Instruction>(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $imm5]",
- (!cast<Instruction>(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>;
+ (!cast<Instruction>(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
- (!cast<Instruction>(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def _IMM : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5)>;
- }
+ (!cast<Instruction>(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;
def : Pat<(nxv4i32 (op (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt)),
(!cast<Instruction>(NAME # _IMM) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
@@ -7970,35 +7904,33 @@ class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
multiclass sve2_mem_gldnt_vs_32_ptrs<bits<5> opc, string asm,
SDPatternOperator op,
ValueType vt> {
- def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm),
- asm, Z_s>;
+ def NAME : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), asm, Z_s>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
+ (!cast<Instruction>(NAME) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
+ (!cast<Instruction>(NAME) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;
+ (!cast<Instruction>(NAME) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;
def : Pat <(nxv4i32 (op (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zd), (i64 GPR64:$Rm), vt)),
- (!cast<Instruction>(NAME # _REAL) PPR3bAny:$Pg, ZPR32:$Zd, GPR64:$Rm)>;
+ (!cast<Instruction>(NAME) PPR3bAny:$Pg, ZPR32:$Zd, GPR64:$Rm)>;
}
multiclass sve2_mem_gldnt_vs_64_ptrs<bits<5> opc, string asm,
SDPatternOperator op,
ValueType vt> {
- def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm),
- asm, Z_d>;
+ def NAME : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), asm, Z_d>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
+ (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
+ (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;
+ (!cast<Instruction>(NAME) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;
def : Pat <(nxv2i64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zd), (i64 GPR64:$Rm), vt)),
- (!cast<Instruction>(NAME # _REAL) PPR3bAny:$Pg, ZPR64:$Zd, GPR64:$Rm)>;
+ (!cast<Instruction>(NAME) PPR3bAny:$Pg, ZPR64:$Zd, GPR64:$Rm)>;
}
//===----------------------------------------------------------------------===//
@@ -8042,22 +7974,13 @@ multiclass sve_mem_64b_gld_sv_32_scaled<bits<4> opc, string asm,
RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd,
ValueType vt> {
- def _UXTW_SCALED_REAL : sve_mem_64b_gld_sv<opc, 0, 1, 0, asm, uxtw_opnd>;
- def _SXTW_SCALED_REAL : sve_mem_64b_gld_sv<opc, 1, 1, 0, asm, sxtw_opnd>;
+ def _UXTW_SCALED : sve_mem_64b_gld_sv<opc, 0, 1, 0, asm, uxtw_opnd>;
+ def _SXTW_SCALED : sve_mem_64b_gld_sv<opc, 1, 1, 0, asm, sxtw_opnd>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _UXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ (!cast<Instruction>(NAME # _UXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def _UXTW_SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
- def _SXTW_SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
- }
+ (!cast<Instruction>(NAME # _SXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
(!cast<Instruction>(NAME # _UXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
@@ -8071,22 +7994,13 @@ multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm,
RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd,
ValueType vt> {
- def _UXTW_REAL : sve_mem_64b_gld_sv<opc, 0, 0, 0, asm, uxtw_opnd>;
- def _SXTW_REAL : sve_mem_64b_gld_sv<opc, 1, 0, 0, asm, sxtw_opnd>;
+ def _UXTW : sve_mem_64b_gld_sv<opc, 0, 0, 0, asm, uxtw_opnd>;
+ def _SXTW : sve_mem_64b_gld_sv<opc, 1, 0, 0, asm, sxtw_opnd>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _UXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ (!cast<Instruction>(NAME # _UXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def _UXTW : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
- def _SXTW : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
- }
+ (!cast<Instruction>(NAME # _SXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
(!cast<Instruction>(NAME # _UXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
@@ -8097,17 +8011,10 @@ multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm,
multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm,
SDPatternOperator op,
RegisterOperand zprext, ValueType vt> {
- def _SCALED_REAL : sve_mem_64b_gld_sv<opc, 1, 1, 1, asm, zprext>;
+ def _SCALED : sve_mem_64b_gld_sv<opc, 1, 1, 1, asm, zprext>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def _SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm)>;
- }
+ (!cast<Instruction>(NAME # _SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
(!cast<Instruction>(NAME # _SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
@@ -8115,17 +8022,10 @@ multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm,
multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm,
SDPatternOperator op, ValueType vt> {
- def _REAL : sve_mem_64b_gld_sv<opc, 1, 0, 1, asm, ZPR64ExtLSL8>;
+ def NAME : sve_mem_64b_gld_sv<opc, 1, 0, 1, asm, ZPR64ExtLSL8>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def "" : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm)>;
- }
+ (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
(!cast<Instruction>(NAME) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
@@ -8158,21 +8058,14 @@ class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty,
SDPatternOperator op, ValueType vt> {
- def _IMM_REAL : sve_mem_64b_gld_vi<opc, asm, imm_ty>;
+ def _IMM : sve_mem_64b_gld_vi<opc, asm, imm_ty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
- (!cast<Instruction>(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>;
+ (!cast<Instruction>(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $imm5]",
- (!cast<Instruction>(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>;
+ (!cast<Instruction>(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
- (!cast<Instruction>(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
-
- // We need a layer of indirection because early machine code passes balk at
- // physical register (i.e. FFR) uses that have no previous definition.
- let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
- def _IMM : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), []>,
- PseudoInstExpansion<(!cast<Instruction>(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5)>;
- }
+ (!cast<Instruction>(NAME # _IMM) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt)),
(!cast<Instruction>(NAME # _IMM) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll
index 29ad550c40d91b..e23151475014da 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll
@@ -49,8 +49,8 @@ define void @masked_gather_v8i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1b { z0.d }, p0/z, [z0.d]
; VBITS_GE_256-NEXT: ld1b { z1.d }, p0/z, [z1.d]
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
-; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 v0.8b, v1.8b, v0.8b
; VBITS_GE_256-NEXT: str d0, [x0]
@@ -153,8 +153,8 @@ define void @masked_gather_v8i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [z0.d]
; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z1.d]
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
-; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: str q1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
index 4ac0abcb851d44..92fce4584f6a97 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
@@ -66,7 +66,6 @@ define void @masked_gather_v8i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: zip2 v1.8b, v0.8b, v0.8b
; VBITS_GE_256-NEXT: zip1 v0.8b, v0.8b, v0.8b
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
-; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: shl v1.4h, v1.4h, #8
; VBITS_GE_256-NEXT: shl v0.4h, v0.4h, #8
; VBITS_GE_256-NEXT: sshr v1.4h, v1.4h, #8
@@ -76,14 +75,15 @@ define void @masked_gather_v8i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; VBITS_GE_256-NEXT: ld1b { z1.d }, p1/z, [z2.d]
+; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0
-; VBITS_GE_256-NEXT: ld1b { z0.d }, p1/z, [z2.d]
-; VBITS_GE_256-NEXT: ld1b { z1.d }, p0/z, [z3.d]
-; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
+; VBITS_GE_256-NEXT: ld1b { z0.d }, p0/z, [z2.d]
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
-; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
-; VBITS_GE_256-NEXT: uzp1 v0.8b, v1.8b, v0.8b
+; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
+; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_256-NEXT: uzp1 v0.8b, v0.8b, v1.8b
; VBITS_GE_256-NEXT: str d0, [x0]
; VBITS_GE_256-NEXT: ret
;
@@ -217,23 +217,23 @@ define void @masked_gather_v8i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0
-; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; VBITS_GE_256-NEXT: sunpklo z1.s, z0.h
+; VBITS_GE_256-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
-; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
-; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
-; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
-; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
-; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z0.d, #0
-; VBITS_GE_256-NEXT: ld1h { z0.d }, p1/z, [z3.d]
-; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
-; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z2.d]
+; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
+; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; VBITS_GE_256-NEXT: ld1h { z1.d }, p1/z, [z2.d]
+; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
+; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [z2.d]
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
-; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
-; VBITS_GE_256-NEXT: mov v0.d[1], v1.d[0]
-; VBITS_GE_256-NEXT: str q0, [x0]
+; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
+; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
+; VBITS_GE_256-NEXT: str q1, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_gather_v8i16:
@@ -353,22 +353,22 @@ define void @masked_gather_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ptrue p2.d, vl4
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
-; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [x1, x8, lsl #3]
-; VBITS_GE_256-NEXT: ld1d { z2.d }, p2/z, [x1]
+; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [x1]
+; VBITS_GE_256-NEXT: ld1d { z2.d }, p2/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; VBITS_GE_256-NEXT: punpklo p3.h, p1.b
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: punpklo p1.h, p1.b
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
+; VBITS_GE_256-NEXT: and p1.b, p3/z, p3.b, p2.b
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
-; VBITS_GE_256-NEXT: and p1.b, p1/z, p1.b, p2.b
-; VBITS_GE_256-NEXT: cmpne p2.d, p2/z, z0.d, #0
+; VBITS_GE_256-NEXT: ld1w { z1.d }, p1/z, [z1.d]
+; VBITS_GE_256-NEXT: cmpne p1.d, p2/z, z0.d, #0
; VBITS_GE_256-NEXT: ld1w { z0.d }, p1/z, [z2.d]
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
-; VBITS_GE_256-NEXT: ld1w { z1.d }, p2/z, [z1.d]
-; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
-; VBITS_GE_256-NEXT: splice z0.s, p1, z0.s, z1.s
-; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
+; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
+; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
+; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_gather_v8i32:
@@ -500,15 +500,15 @@ define void @masked_gather_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
-; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
-; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
+; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
+; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0
-; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0
+; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [z1.d]
+; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z1.d, #0
; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [z2.d]
-; VBITS_GE_256-NEXT: ld1d { z0.d }, p2/z, [z0.d]
-; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3]
-; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0]
+; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
+; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_gather_v8i64:
@@ -626,23 +626,23 @@ define void @masked_gather_v8f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: fcmeq v0.8h, v0.8h, #0.0
-; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; VBITS_GE_256-NEXT: sunpklo z2.s, z0.h
+; VBITS_GE_256-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
-; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
-; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
-; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
+; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
-; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
-; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z0.d, #0
-; VBITS_GE_256-NEXT: ld1h { z0.d }, p1/z, [z3.d]
-; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
-; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z2.d]
+; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
+; VBITS_GE_256-NEXT: ld1h { z1.d }, p1/z, [z1.d]
+; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [z2.d]
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
-; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
-; VBITS_GE_256-NEXT: mov v0.d[1], v1.d[0]
-; VBITS_GE_256-NEXT: str q0, [x0]
+; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
+; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
+; VBITS_GE_256-NEXT: str q1, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_gather_v8f16:
@@ -762,22 +762,22 @@ define void @masked_gather_v8f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ptrue p2.d, vl4
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
-; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [x1, x8, lsl #3]
-; VBITS_GE_256-NEXT: ld1d { z2.d }, p2/z, [x1]
+; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [x1]
+; VBITS_GE_256-NEXT: ld1d { z2.d }, p2/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
+; VBITS_GE_256-NEXT: punpklo p3.h, p1.b
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: punpklo p1.h, p1.b
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
+; VBITS_GE_256-NEXT: and p1.b, p3/z, p3.b, p2.b
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
-; VBITS_GE_256-NEXT: and p1.b, p1/z, p1.b, p2.b
-; VBITS_GE_256-NEXT: cmpne p2.d, p2/z, z0.d, #0
+; VBITS_GE_256-NEXT: ld1w { z1.d }, p1/z, [z1.d]
+; VBITS_GE_256-NEXT: cmpne p1.d, p2/z, z0.d, #0
; VBITS_GE_256-NEXT: ld1w { z0.d }, p1/z, [z2.d]
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
-; VBITS_GE_256-NEXT: ld1w { z1.d }, p2/z, [z1.d]
-; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
-; VBITS_GE_256-NEXT: splice z0.s, p1, z0.s, z1.s
-; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
+; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
+; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
+; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_gather_v8f32:
@@ -911,11 +911,11 @@ define void @masked_gather_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0
-; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z1.d, #0.0
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
-; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [z0.d]
-; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [z1.d]
+; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z1.d, #0.0
+; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
+; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [z1.d]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
index bdaea0ecf144ad..5ff9f0f0df62f8 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
@@ -19,8 +19,8 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
; CHECK-NEXT: smax z4.d, z4.d, #0
; CHECK-NEXT: smin z4.d, p0/m, z4.d, z5.d
; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0
-; CHECK-NEXT: ld1w { z4.d }, p1/z, [x1]
; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d]
+; CHECK-NEXT: ld1w { z4.d }, p1/z, [x1]
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s
; CHECK-NEXT: add z0.d, z2.d, z1.d
diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll
index a40d550852798e..4d2bc4bde13f5f 100644
--- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll
@@ -31,9 +31,9 @@ define <vscale x 2 x i64> @no_dag_combine_sext(<vscale x 2 x i1> %pg,
; CHECK-LABEL: no_dag_combine_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1b { z1.d }, p0/z, [z0.d, #16]
-; CHECK-NEXT: ptrue p2.d
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z0, z1
-; CHECK-NEXT: sxtb z0.d, p2/m, z1.d
+; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
; CHECK-NEXT: st1b { z1.d }, p1, [x0]
; CHECK-NEXT: ret
<vscale x 2 x i64> %base,
@@ -79,15 +79,15 @@ define <vscale x 16 x i8> @narrow_i64_gather_index_i8_zext(ptr %out, ptr %in, <v
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: add x8, x1, x2
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x8, #3, mul vl]
-; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1, x2]
-; CHECK-NEXT: ld1b { z2.s }, p0/z, [x8, #1, mul vl]
-; CHECK-NEXT: ld1b { z3.s }, p0/z, [x8, #2, mul vl]
+; CHECK-NEXT: ld1b { z1.s }, p0/z, [x8, #2, mul vl]
+; CHECK-NEXT: ld1b { z2.s }, p0/z, [x1, x2]
+; CHECK-NEXT: ld1b { z3.s }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x1, z0.s, uxtw]
-; CHECK-NEXT: ld1b { z3.s }, p0/z, [x1, z3.s, uxtw]
; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1, z1.s, uxtw]
; CHECK-NEXT: ld1b { z2.s }, p0/z, [x1, z2.s, uxtw]
-; CHECK-NEXT: uzp1 z0.h, z3.h, z0.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
+; CHECK-NEXT: ld1b { z3.s }, p0/z, [x1, z3.s, uxtw]
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uzp1 z1.h, z2.h, z3.h
; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
; CHECK-NEXT: ret
%1 = getelementptr inbounds i8, ptr %in, i64 %ptr
@@ -105,15 +105,15 @@ define <vscale x 16 x i8> @narrow_i64_gather_index_i8_sext(ptr %out, ptr %in, <v
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: add x8, x1, x2
; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x8, #3, mul vl]
-; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x1, x2]
-; CHECK-NEXT: ld1sb { z2.s }, p0/z, [x8, #1, mul vl]
-; CHECK-NEXT: ld1sb { z3.s }, p0/z, [x8, #2, mul vl]
+; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x8, #2, mul vl]
+; CHECK-NEXT: ld1sb { z2.s }, p0/z, [x1, x2]
+; CHECK-NEXT: ld1sb { z3.s }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x1, z0.s, sxtw]
-; CHECK-NEXT: ld1b { z3.s }, p0/z, [x1, z3.s, sxtw]
; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1, z1.s, sxtw]
; CHECK-NEXT: ld1b { z2.s }, p0/z, [x1, z2.s, sxtw]
-; CHECK-NEXT: uzp1 z0.h, z3.h, z0.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
+; CHECK-NEXT: ld1b { z3.s }, p0/z, [x1, z3.s, sxtw]
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: uzp1 z1.h, z2.h, z3.h
; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
; CHECK-NEXT: ret
%1 = getelementptr inbounds i8, ptr %in, i64 %ptr
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll
index 9c9a8b10376d83..3ab0e3b8980f54 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
;
; LDFF1H, LDFF1W, LDFF1D: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll
index ab90115ee19936..e18f643788986f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
;
; LDFF1B, LDFF1W, LDFF1H, LDFF1D: base + 32-bit unscaled offset, sign (sxtw) or zero
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll
index 049a446c1d7bdb..128f6bba1b6bad 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
;
; LDFF1H, LDFF1W, LDFF1D: base + 64-bit scaled offset
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll
index d89394a30c5045..34dbcf3bef5ccb 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
;
; LDFF1B, LDFF1W, LDFF1H, LDFF1D: base + 64-bit unscaled offset
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll
index c7f0cd2b57481c..ce2218a7b2f092 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
;
; LDFF1B, LDFF1W, LDFF1H, LDFF1D: vector base + immediate offset (index)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll
index 523249805e140e..8ed3680d5fec3f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
;
; LDFF1B, LDFF1W, LDFF1H, LDFF1D: vector base + scalar offset (index)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
index bc07c972e5fb80..627695e7c6eb35 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
index bfff3f76fd7455..7e4e6176800725 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
;
; LDFF1B
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
index ef24f575fba6a1..9a90f4e10fc835 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
; Range testing for the immediate in the reg+imm(mulvl) addressing
; mode is done only for one instruction. The rest of the instrucions
diff --git a/llvm/test/CodeGen/AArch64/sve-ldnf1.mir b/llvm/test/CodeGen/AArch64/sve-ldnf1.mir
index 991d997f3317a7..6d094259c55d99 100644
--- a/llvm/test/CodeGen/AArch64/sve-ldnf1.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ldnf1.mir
@@ -48,44 +48,44 @@ body: |
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
- ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
- ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
- ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
- ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
- ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
- ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
- ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
- ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
- ; CHECK-NEXT: renamable $z0 = LDNF1D_IMM renamable $p0, $sp, 7 :: (load (s64) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1D_IMM renamable $p0, $sp, 7, implicit $ffr, implicit-def $ffr :: (load (s64) from %ir.object)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
; CHECK-NEXT: RET_ReallyLR implicit $z0
- renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
- renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
- renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
- renamable $z0 = LDNF1D_IMM renamable $p0, %stack.1.object, 7 :: (load 8 from %ir.object, align 8)
+ renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1D_IMM renamable $p0, %stack.1.object, 7, implicit $ffr, implicit-def $ffr :: (load 8 from %ir.object, align 8)
RET_ReallyLR implicit $z0
...
@@ -107,44 +107,44 @@ body: |
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
- ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
- ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
- ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
- ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
- ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
- ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
- ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8)
- ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8)
- ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8)
- ; CHECK-NEXT: renamable $z0 = LDNF1D_IMM renamable $p0, $sp, -8 :: (load (s64) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1D_IMM renamable $p0, $sp, -8, implicit $ffr, implicit-def $ffr :: (load (s64) from %ir.object)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
; CHECK-NEXT: RET_ReallyLR implicit $z0
- renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8)
- renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8)
- renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8)
- renamable $z0 = LDNF1D_IMM renamable $p0, %stack.1.object, -8 :: (load 8 from %ir.object, align 8)
+ renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1D_IMM renamable $p0, %stack.1.object, -8, implicit $ffr, implicit-def $ffr :: (load 8 from %ir.object, align 8)
RET_ReallyLR implicit $z0
...
@@ -167,56 +167,56 @@ body: |
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
- ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
- ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
- ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 1
- ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 1
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
- ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
- ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
- ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
- ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
- ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
- ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
- ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
- ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, killed $x8, 7, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
; CHECK-NEXT: RET_ReallyLR implicit $z0
- renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8)
- renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8)
- renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, 8, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
RET_ReallyLR implicit $z0
...
@@ -239,55 +239,55 @@ body: |
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
- ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
- ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
- ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -1
- ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -1
- ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
- ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
- ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
- ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
- ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
- ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
- ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
- ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
- ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8)
+ ; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, killed $x8, -8, implicit $ffr, implicit-def $ffr :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
; CHECK-NEXT: RET_ReallyLR implicit $z0
- renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
- renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
- renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8)
- renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8)
- renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 1 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 2 from %ir.object, align 2)
+ renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
+ renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, -9, implicit $ffr, implicit-def $ffr :: (load 4 from %ir.object, align 8)
RET_ReallyLR implicit $z0
...
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
index 0d06c21abfd2ee..3ecfa7e6c0b186 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
@@ -113,8 +113,8 @@ define <vscale x 8 x half> @masked_gather_nxv8f16(<vscale x 8 x ptr> %ptrs, <vsc
; CHECK-NEXT: ld1h { z2.d }, p1/z, [z2.d]
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
-; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s
; CHECK-NEXT: ld1h { z1.d }, p1/z, [z1.d]
+; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir
index 082781d8b05673..86383ea5331800 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir
@@ -11,7 +11,7 @@ body: |
; CHECK: RDFFRS_PPz
; CHECK-NOT: PTEST
- %1:ppr_3b = RDFFR_PPz %0:ppr_3b
+ %1:ppr_3b = RDFFR_PPz %0:ppr_3b, implicit $ffr
PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
; Consume nzcv
@@ -32,7 +32,7 @@ body: |
; CHECK: RDFFR_PPz
; CHECK: PTEST
- %2:ppr_3b = RDFFR_PPz %0:ppr_3b
+ %2:ppr_3b = RDFFR_PPz %0:ppr_3b, implicit $ffr
PTEST_PP killed %1:ppr_3b, killed %2:ppr_3b, implicit-def $nzcv
; Consume nzcv
@@ -53,7 +53,7 @@ body: |
; CHECK: RDFFR_PPz
; CHECK-NEXT: ADDSXrr
; CHECK-NEXT: PTEST_PP
- %1:ppr_3b = RDFFR_PPz %0:ppr_3b
+ %1:ppr_3b = RDFFR_PPz %0:ppr_3b, implicit $ffr
; Clobber nzcv
$x0 = ADDSXrr $x0, $x0, implicit-def $nzcv
PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
@@ -78,7 +78,7 @@ body: |
; CHECK: RDFFR_PPz
; CHECK-NEXT: CSINCWr
; CHECK-NEXT: PTEST_PP
- %1:ppr_3b = RDFFR_PPz %0:ppr_3b
+ %1:ppr_3b = RDFFR_PPz %0:ppr_3b, implicit $ffr
; Consume nzcv
%2:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
diff --git a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
index f915e1eaf07f0e..4e6ac997621b38 100644
--- a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
@@ -4240,10 +4240,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 0.50 rbit z0.d, p7/m, z31.d
# CHECK-NEXT: 1 4 0.50 rbit z0.h, p7/m, z31.h
# CHECK-NEXT: 1 4 0.50 rbit z0.s, p7/m, z31.s
-# CHECK-NEXT: 1 3 1.00 U rdffr p0.b
-# CHECK-NEXT: 1 3 1.00 U rdffr p0.b, p0/z
-# CHECK-NEXT: 1 3 1.00 U rdffr p15.b
-# CHECK-NEXT: 1 3 1.00 U rdffr p15.b, p15/z
+# CHECK-NEXT: 1 3 1.00 * U rdffr p0.b
+# CHECK-NEXT: 1 3 1.00 * U rdffr p0.b, p0/z
+# CHECK-NEXT: 1 3 1.00 * U rdffr p15.b
+# CHECK-NEXT: 1 3 1.00 * U rdffr p15.b, p15/z
# CHECK-NEXT: 1 3 1.00 U rdffrs p0.b, p0/z
# CHECK-NEXT: 1 3 1.00 U rdffrs p15.b, p15/z
# CHECK-NEXT: 1 1 0.50 rdvl x0, #0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-sve-instructions.s
index 867bb10e7d2574..a8fb8b669838fa 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-sve-instructions.s
@@ -5369,10 +5369,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 3 0.50 rbit z0.d, p7/m, z31.d
# CHECK-NEXT: 1 3 0.50 rbit z0.h, p7/m, z31.h
# CHECK-NEXT: 1 3 0.50 rbit z0.s, p7/m, z31.s
-# CHECK-NEXT: 1 1 0.33 U rdffr p0.b
-# CHECK-NEXT: 1 3 1.00 U rdffr p0.b, p0/z
-# CHECK-NEXT: 1 1 0.33 U rdffr p15.b
-# CHECK-NEXT: 1 3 1.00 U rdffr p15.b, p15/z
+# CHECK-NEXT: 1 1 0.33 * U rdffr p0.b
+# CHECK-NEXT: 1 3 1.00 * U rdffr p0.b, p0/z
+# CHECK-NEXT: 1 1 0.33 * U rdffr p15.b
+# CHECK-NEXT: 1 3 1.00 * U rdffr p15.b, p15/z
# CHECK-NEXT: 1 3 1.00 U rdffrs p0.b, p0/z
# CHECK-NEXT: 1 3 1.00 U rdffrs p15.b, p15/z
# CHECK-NEXT: 1 1 0.33 rdvl x0, #0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
index 0f4003671a36c7..99e39567b1ad61 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
@@ -5369,10 +5369,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 rbit z0.d, p7/m, z31.d
# CHECK-NEXT: 1 2 0.50 rbit z0.h, p7/m, z31.h
# CHECK-NEXT: 1 2 0.50 rbit z0.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 2.00 U rdffr p0.b
-# CHECK-NEXT: 2 3 1.00 U rdffr p0.b, p0/z
-# CHECK-NEXT: 1 2 2.00 U rdffr p15.b
-# CHECK-NEXT: 2 3 1.00 U rdffr p15.b, p15/z
+# CHECK-NEXT: 1 2 2.00 * U rdffr p0.b
+# CHECK-NEXT: 2 3 1.00 * U rdffr p0.b, p0/z
+# CHECK-NEXT: 1 2 2.00 * U rdffr p15.b
+# CHECK-NEXT: 2 3 1.00 * U rdffr p15.b, p15/z
# CHECK-NEXT: 4 4 2.00 U rdffrs p0.b, p0/z
# CHECK-NEXT: 4 4 2.00 U rdffrs p15.b, p15/z
# CHECK-NEXT: 1 2 0.50 rdvl x0, #0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
index a9a5b8e22ee7a8..efa81b0ffcd48b 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
@@ -4057,10 +4057,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 rbit z0.d, p7/m, z31.d
# CHECK-NEXT: 1 2 0.50 rbit z0.h, p7/m, z31.h
# CHECK-NEXT: 1 2 0.50 rbit z0.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 1.00 U rdffr p0.b
-# CHECK-NEXT: 2 3 2.00 U rdffr p0.b, p0/z
-# CHECK-NEXT: 1 2 1.00 U rdffr p15.b
-# CHECK-NEXT: 2 3 2.00 U rdffr p15.b, p15/z
+# CHECK-NEXT: 1 2 1.00 * U rdffr p0.b
+# CHECK-NEXT: 2 3 2.00 * U rdffr p0.b, p0/z
+# CHECK-NEXT: 1 2 1.00 * U rdffr p15.b
+# CHECK-NEXT: 2 3 2.00 * U rdffr p15.b, p15/z
# CHECK-NEXT: 1 4 0.50 U rdffrs p0.b, p0/z
# CHECK-NEXT: 1 4 0.50 U rdffrs p15.b, p15/z
# CHECK-NEXT: 1 2 1.00 rdvl x0, #0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s
index c8d4640afb6434..4d6ce706b05274 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s
@@ -5374,10 +5374,10 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.25 rbit z0.d, p7/m, z31.d
# CHECK-NEXT: 1 2 0.25 rbit z0.h, p7/m, z31.h
# CHECK-NEXT: 1 2 0.25 rbit z0.s, p7/m, z31.s
-# CHECK-NEXT: 1 2 1.00 U rdffr p0.b
-# CHECK-NEXT: 2 4 1.00 U rdffr p0.b, p0/z
-# CHECK-NEXT: 1 2 1.00 U rdffr p15.b
-# CHECK-NEXT: 2 4 1.00 U rdffr p15.b, p15/z
+# CHECK-NEXT: 1 2 1.00 * U rdffr p0.b
+# CHECK-NEXT: 2 4 1.00 * U rdffr p0.b, p0/z
+# CHECK-NEXT: 1 2 1.00 * U rdffr p15.b
+# CHECK-NEXT: 2 4 1.00 * U rdffr p15.b, p15/z
# CHECK-NEXT: 4 5 2.00 U rdffrs p0.b, p0/z
# CHECK-NEXT: 4 5 2.00 U rdffrs p15.b, p15/z
# CHECK-NEXT: 1 2 0.50 rdvl x0, #0
More information about the llvm-commits
mailing list