[llvm] [AArch64] Hint regalloc to choose distinct predicate for MATCH/CMP (PR #190139)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 02:36:49 PDT 2026
https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/190139
For some cores it is preferable to choose a destination register that does not match the governing predicate for the CMP<> and MATCH instructions.
The hint is conservative in that it tries not to pick a callee-save register if it's not already used/allocated for other purposes, as that would introduce new spills/fills. Note that this might be preferable if e.g. the CMP/MATCH instruction is executed in a loop, but it might also be less preferable for small functions that have an SVE interface (p4-p15 are caller-preserved).
It is enabled for all cores by default, but it can be disabled by adding the `disable-distinct-dst-reg-cmp-match` feature. This feature can also be added to specific cores if this behaviour is undesirable.
>From e52d324b1c0302f5a8dbd1e279e5213cdc1d8390 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 24 Mar 2026 10:36:26 +0000
Subject: [PATCH] [AArch64] Hint regalloc to choose distinct predicate for
MATCH/CMP
For some cores it is preferable to choose a destination register that does
not match the governing predicate for the CMP<> and MATCH instructions.
The hint is conservative in that it tries not to pick a callee-save
register if it's not already used/allocated for other purposes, as that
would introduce new spills/fills. Note that this might be preferable
if e.g. the CMP/MATCH instruction is executed in a loop, but it might
also be less preferable for small functions that have an SVE interface
(p4-p15 are caller-preserved).
It is enabled for all cores by default, but it can be disabled by adding
the `disable-distinct-dst-reg-cmp-match` feature. This feature can also
be added to specific cores if this behaviour is undesirable.
---
llvm/lib/Target/AArch64/AArch64Features.td | 10 +
.../Target/AArch64/AArch64RegisterInfo.cpp | 162 +++++++-
llvm/test/CodeGen/AArch64/active_lane_mask.ll | 20 +-
.../CodeGen/AArch64/combine-storetomstore.ll | 112 ++---
...rleaving-reductions-predicated-scalable.ll | 18 +-
.../AArch64/dag-combine-concat-vectors.ll | 4 +-
.../CodeGen/AArch64/extract-vector-elt-sve.ll | 24 +-
.../AArch64/intrinsic-cttz-elts-sve.ll | 6 +-
.../AArch64/intrinsic-vector-match-sve2.ll | 28 +-
llvm/test/CodeGen/AArch64/rcpc3-sve.ll | 4 +-
...ate-sm-changing-call-disable-coalescing.ll | 4 +-
llvm/test/CodeGen/AArch64/sve-cmp-select.ll | 12 +-
llvm/test/CodeGen/AArch64/sve-fcvt.ll | 16 +-
.../AArch64/sve-fixed-length-fp-select.ll | 66 +--
.../sve-fixed-length-insert-vector-elt.ll | 60 +--
.../AArch64/sve-fixed-length-int-select.ll | 88 ++--
.../AArch64/sve-fixed-length-int-vselect.ll | 80 ++--
.../sve-fixed-length-masked-128bit-loads.ll | 22 +-
.../sve-fixed-length-masked-128bit-stores.ll | 16 +-
.../AArch64/sve-fixed-length-masked-gather.ll | 100 ++---
.../AArch64/sve-fixed-length-masked-loads.ll | 116 +++---
.../sve-fixed-length-masked-scatter.ll | 128 +++---
.../AArch64/sve-fixed-length-masked-stores.ll | 102 ++---
.../AArch64/sve-fixed-length-subvector.ll | 4 +-
.../CodeGen/AArch64/sve-insert-element.ll | 64 +--
.../AArch64/sve-intrinsics-int-compares.ll | 8 +-
llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll | 16 +-
.../CodeGen/AArch64/sve-load-compare-store.ll | 4 +-
.../AArch64/sve-masked-compressstore.ll | 46 +--
.../AArch64/sve-match-cmp-predicate.ll | 205 +++++++++
.../AArch64/sve-nontemporal-masked-ldst.ll | 8 +-
.../test/CodeGen/AArch64/sve-pred-selectop.ll | 228 +++++-----
.../CodeGen/AArch64/sve-pred-selectop2.ll | 388 +++++++++---------
.../CodeGen/AArch64/sve-pred-selectop3.ll | 260 ++++++------
.../AArch64/sve-ptest-removal-cmpeq.ll | 12 +-
.../AArch64/sve-ptest-removal-cmpge.ll | 12 +-
.../AArch64/sve-ptest-removal-cmpgt.ll | 12 +-
.../AArch64/sve-ptest-removal-cmphi.ll | 12 +-
.../AArch64/sve-ptest-removal-cmphs.ll | 12 +-
.../AArch64/sve-ptest-removal-cmple.ll | 72 ++--
.../AArch64/sve-ptest-removal-cmplo.ll | 8 +-
.../AArch64/sve-ptest-removal-cmpls.ll | 8 +-
.../AArch64/sve-ptest-removal-cmplt.ll | 8 +-
.../AArch64/sve-ptest-removal-cmpne.ll | 12 +-
.../AArch64/sve-ptest-removal-match.ll | 4 +-
.../CodeGen/AArch64/sve-punpklo-combine.ll | 48 +--
.../AArch64/sve-regalloc-hint-match-cmp.mir | 64 +++
llvm/test/CodeGen/AArch64/sve-scmp.ll | 28 +-
llvm/test/CodeGen/AArch64/sve-select.ll | 4 +-
llvm/test/CodeGen/AArch64/sve-setcc.ll | 4 +-
llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll | 56 +--
.../CodeGen/AArch64/sve-split-insert-elt.ll | 12 +-
...e-streaming-mode-fixed-length-fp-select.ll | 42 +-
...-streaming-mode-fixed-length-fp-vselect.ll | 24 +-
...ing-mode-fixed-length-insert-vector-elt.ll | 84 ++--
...treaming-mode-fixed-length-int-compares.ll | 84 ++--
...eaming-mode-fixed-length-int-immediates.ll | 16 +-
...-streaming-mode-fixed-length-int-select.ll | 64 +--
...streaming-mode-fixed-length-int-vselect.ll | 56 +--
...mode-fixed-length-masked-gather-scatter.ll | 4 +-
...streaming-mode-fixed-length-masked-load.ll | 60 +--
...treaming-mode-fixed-length-masked-store.ll | 48 +--
llvm/test/CodeGen/AArch64/sve-trunc.ll | 8 +-
llvm/test/CodeGen/AArch64/sve-ucmp.ll | 28 +-
llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll | 56 +--
.../CodeGen/AArch64/sve-vector-compress.ll | 28 +-
66 files changed, 1927 insertions(+), 1492 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sve-match-cmp-predicate.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve-regalloc-hint-match-cmp.mir
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 4fbad6643d77a..7447fe7afbc68 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -911,6 +911,16 @@ def FeatureDisableFastIncVL : SubtargetFeature<"disable-fast-inc-vl",
"HasDisableFastIncVL", "true",
"Do not prefer INC/DEC, ALL, { 1, 2, 4 } over ADDVL">;
+// For some cores it is preferable to choose a destination register that does
+// not match the governing predicate for the CMP<> and MATCH instructions. When
+// enabled the register allocator tries to use a distinct register. While the
+// algorithm tries to avoid introducing callee-save spill/fills and therefore
+// shouldn't have any adverse affects on cores where using distinct registers
+// is not necessarily preferable, we still provide a way to turn the feature off.
+def FeatureDisableDistinctDstRegCmpMatch : SubtargetFeature<
+ "disable-distinct-dst-reg-cmp-match", "UseDistinctDstRegCmpMatch",
+ "false", "Use distinct destination register for CMP/MATCH instructions">;
+
// On most processors we want to avoid moving from WZR to vector registers
// (relying on materializing 0 to a FPR and moving from there instead),
// but on some (in-order) cores it's preferable to avoid the extra instruction instead.
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 8c0dd4381fae8..0518bfb0cd44f 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1114,6 +1114,159 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
+static bool requiresMatchCmpRegallocHint(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ // All match instructions
+ case AArch64::MATCH_PPzZZ_B:
+ case AArch64::MATCH_PPzZZ_H:
+ // Vector compare instructions (CMPL* are aliases of CMPG*/H*)
+ case AArch64::CMPEQ_PPzZZ_B:
+ case AArch64::CMPEQ_PPzZZ_H:
+ case AArch64::CMPEQ_PPzZZ_S:
+ case AArch64::CMPEQ_PPzZZ_D:
+ case AArch64::CMPNE_PPzZZ_B:
+ case AArch64::CMPNE_PPzZZ_H:
+ case AArch64::CMPNE_PPzZZ_S:
+ case AArch64::CMPNE_PPzZZ_D:
+ case AArch64::CMPGE_PPzZZ_B:
+ case AArch64::CMPGE_PPzZZ_H:
+ case AArch64::CMPGE_PPzZZ_S:
+ case AArch64::CMPGE_PPzZZ_D:
+ case AArch64::CMPHS_PPzZZ_B:
+ case AArch64::CMPHS_PPzZZ_H:
+ case AArch64::CMPHS_PPzZZ_S:
+ case AArch64::CMPHS_PPzZZ_D:
+ case AArch64::CMPGT_PPzZZ_B:
+ case AArch64::CMPGT_PPzZZ_H:
+ case AArch64::CMPGT_PPzZZ_S:
+ case AArch64::CMPGT_PPzZZ_D:
+ case AArch64::CMPHI_PPzZZ_B:
+ case AArch64::CMPHI_PPzZZ_H:
+ case AArch64::CMPHI_PPzZZ_S:
+ case AArch64::CMPHI_PPzZZ_D:
+ // Vector/immediate compare instructions
+ case AArch64::CMPEQ_PPzZI_B:
+ case AArch64::CMPEQ_PPzZI_H:
+ case AArch64::CMPEQ_PPzZI_S:
+ case AArch64::CMPEQ_PPzZI_D:
+ case AArch64::CMPNE_PPzZI_B:
+ case AArch64::CMPNE_PPzZI_H:
+ case AArch64::CMPNE_PPzZI_S:
+ case AArch64::CMPNE_PPzZI_D:
+ case AArch64::CMPGE_PPzZI_B:
+ case AArch64::CMPGE_PPzZI_H:
+ case AArch64::CMPGE_PPzZI_S:
+ case AArch64::CMPGE_PPzZI_D:
+ case AArch64::CMPHS_PPzZI_B:
+ case AArch64::CMPHS_PPzZI_H:
+ case AArch64::CMPHS_PPzZI_S:
+ case AArch64::CMPHS_PPzZI_D:
+ case AArch64::CMPGT_PPzZI_B:
+ case AArch64::CMPGT_PPzZI_H:
+ case AArch64::CMPGT_PPzZI_S:
+ case AArch64::CMPGT_PPzZI_D:
+ case AArch64::CMPHI_PPzZI_B:
+ case AArch64::CMPHI_PPzZI_H:
+ case AArch64::CMPHI_PPzZI_S:
+ case AArch64::CMPHI_PPzZI_D:
+ case AArch64::CMPLE_PPzZI_B:
+ case AArch64::CMPLE_PPzZI_H:
+ case AArch64::CMPLE_PPzZI_S:
+ case AArch64::CMPLE_PPzZI_D:
+ case AArch64::CMPLS_PPzZI_B:
+ case AArch64::CMPLS_PPzZI_H:
+ case AArch64::CMPLS_PPzZI_S:
+ case AArch64::CMPLS_PPzZI_D:
+ case AArch64::CMPLT_PPzZI_B:
+ case AArch64::CMPLT_PPzZI_H:
+ case AArch64::CMPLT_PPzZI_S:
+ case AArch64::CMPLT_PPzZI_D:
+ case AArch64::CMPLO_PPzZI_B:
+ case AArch64::CMPLO_PPzZI_H:
+ case AArch64::CMPLO_PPzZI_S:
+ case AArch64::CMPLO_PPzZI_D:
+ // Wide-vector compare instructions
+ case AArch64::CMPEQ_WIDE_PPzZZ_B:
+ case AArch64::CMPEQ_WIDE_PPzZZ_H:
+ case AArch64::CMPEQ_WIDE_PPzZZ_S:
+ case AArch64::CMPNE_WIDE_PPzZZ_B:
+ case AArch64::CMPNE_WIDE_PPzZZ_H:
+ case AArch64::CMPNE_WIDE_PPzZZ_S:
+ case AArch64::CMPGE_WIDE_PPzZZ_B:
+ case AArch64::CMPGE_WIDE_PPzZZ_H:
+ case AArch64::CMPGE_WIDE_PPzZZ_S:
+ case AArch64::CMPHS_WIDE_PPzZZ_B:
+ case AArch64::CMPHS_WIDE_PPzZZ_H:
+ case AArch64::CMPHS_WIDE_PPzZZ_S:
+ case AArch64::CMPGT_WIDE_PPzZZ_B:
+ case AArch64::CMPGT_WIDE_PPzZZ_H:
+ case AArch64::CMPGT_WIDE_PPzZZ_S:
+ case AArch64::CMPHI_WIDE_PPzZZ_B:
+ case AArch64::CMPHI_WIDE_PPzZZ_H:
+ case AArch64::CMPHI_WIDE_PPzZZ_S:
+ case AArch64::CMPLE_WIDE_PPzZZ_B:
+ case AArch64::CMPLE_WIDE_PPzZZ_H:
+ case AArch64::CMPLE_WIDE_PPzZZ_S:
+ case AArch64::CMPLS_WIDE_PPzZZ_B:
+ case AArch64::CMPLS_WIDE_PPzZZ_H:
+ case AArch64::CMPLS_WIDE_PPzZZ_S:
+ case AArch64::CMPLT_WIDE_PPzZZ_B:
+ case AArch64::CMPLT_WIDE_PPzZZ_H:
+ case AArch64::CMPLT_WIDE_PPzZZ_S:
+ case AArch64::CMPLO_WIDE_PPzZZ_B:
+ case AArch64::CMPLO_WIDE_PPzZZ_H:
+ case AArch64::CMPLO_WIDE_PPzZZ_S:
+ return true;
+ }
+}
+
+static bool HandleMatchCmpPredicateHint(Register VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const VirtRegMap *VRM,
+ const MachineRegisterInfo &MRI,
+ const AArch64Subtarget &ST,
+ const LiveRegMatrix *Matrix) {
+ const TargetRegisterClass *RegRC = MRI.getRegClass(VirtReg);
+ if (!ST.useDistinctDstRegCmpMatch() ||
+ !AArch64::PPRRegClass.hasSubClassEq(RegRC) || !MRI.hasOneDef(VirtReg) ||
+ Order.size() < 2)
+ return false;
+
+ const MachineInstr *DefInst = MRI.getOneDef(VirtReg)->getParent();
+ if (!requiresMatchCmpRegallocHint(DefInst->getOpcode()))
+ return false;
+
+ Register Op1Reg = DefInst->getOperand(1).getReg();
+ if (Op1Reg.isVirtual())
+ Op1Reg = VRM->getPhys(Op1Reg);
+
+ // If no register is allocated for the general-predicate, it's not yet
+ // possible to choose a distinct register.
+ if (!Op1Reg.isValid())
+ return false;
+
+ // Move Op1Reg as the least preferred register.
+ //
+ // This might result in callee-save spills when the function takes/returns
+ // arguments in SVE registers (i.e. needs to preserve p4-p15) and can't reuse
+ // p0-p3. That's why we limit it to non-callee saved registers or to
+ // callee-saved registers that have already been allocated for other uses in
+ // the function.
+ DenseSet<unsigned> CSRs;
+ for (unsigned I = 0; MRI.getCalleeSavedRegs()[I]; ++I)
+ CSRs.insert(MRI.getCalleeSavedRegs()[I]);
+
+ Hints.append(Order.begin(), Order.end());
+ llvm::stable_sort(Hints, [&](Register A, Register B) {
+ return B == Op1Reg &&
+ (!CSRs.contains(A) || !MRI.def_empty(A) || Matrix->isPhysRegUsed(A));
+ });
+ return true;
+}
+
// We add regalloc hints for different cases:
// * Choosing a better destination operand for predicated SVE instructions
// where the inactive lanes are undef, by choosing a register that is not
@@ -1143,14 +1296,14 @@ bool AArch64RegisterInfo::getRegAllocationHints(
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool ConsiderOnlyHints =
+ TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
+
// For predicated SVE instructions where the inactive lanes are undef,
// pick a destination register that is not unique to avoid introducing
// a movprfx.
const TargetRegisterClass *RegRC = MRI.getRegClass(VirtReg);
if (AArch64::ZPRRegClass.hasSubClassEq(RegRC)) {
- bool ConsiderOnlyHints = TargetRegisterInfo::getRegAllocationHints(
- VirtReg, Order, Hints, MF, VRM);
-
for (const MachineOperand &DefOp : MRI.def_operands(VirtReg)) {
const MachineInstr &Def = *DefOp.getParent();
if (DefOp.isImplicit() ||
@@ -1200,6 +1353,9 @@ bool AArch64RegisterInfo::getRegAllocationHints(
return ConsiderOnlyHints;
}
+ if (HandleMatchCmpPredicateHint(VirtReg, Order, Hints, VRM, MRI, ST, Matrix))
+ return ConsiderOnlyHints;
+
if (!ST.hasSME() || !ST.isStreaming())
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
VRM);
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index 05d083a654cf6..778be79038a78 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -183,8 +183,8 @@ define <vscale x 1 x i1> @lane_mask_nxv1i1_i32(i32 %index, i32 %TC) {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uqadd z0.s, z0.s, z1.s
; CHECK-NEXT: mov z1.s, w1
-; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: cmphi p1.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ret
%active.lane.mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 %index, i32 %TC)
@@ -303,8 +303,8 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
; CHECK-STREAMING-NEXT: mov z1.b, w1
-; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
-; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
+; CHECK-STREAMING-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC)
ret <16 x i1> %active.lane.mask
@@ -329,8 +329,8 @@ define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
; CHECK-STREAMING-NEXT: mov z1.b, w1
-; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
-; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
+; CHECK-STREAMING-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC)
ret <8 x i1> %active.lane.mask
@@ -360,8 +360,8 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: mov z1.h, w1
; CHECK-STREAMING-NEXT: umin z0.h, z0.h, #255
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
-; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
-; CHECK-STREAMING-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT: cmphi p1.h, p0/z, z1.h, z0.h
+; CHECK-STREAMING-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC)
ret <4 x i1> %active.lane.mask
@@ -389,8 +389,8 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: and w8, w1, #0xff
; CHECK-STREAMING-NEXT: mov z1.s, w8
; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255
-; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
-; CHECK-STREAMING-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-STREAMING-NEXT: cmphi p1.s, p0/z, z1.s, z0.s
+; CHECK-STREAMING-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
ret <2 x i1> %active.lane.mask
diff --git a/llvm/test/CodeGen/AArch64/combine-storetomstore.ll b/llvm/test/CodeGen/AArch64/combine-storetomstore.ll
index 1e4a695d1f4e8..d4244fedf7504 100644
--- a/llvm/test/CodeGen/AArch64/combine-storetomstore.ll
+++ b/llvm/test/CodeGen/AArch64/combine-storetomstore.ll
@@ -24,8 +24,8 @@ define void @test_masked_store_success_v4i16(<4 x i16> %x, ptr %ptr, <4 x i1> %m
; SVE-NEXT: shl v1.4h, v1.4h, #15
; SVE-NEXT: ptrue p0.h, vl4
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT: cmpne p0.h, p0/z, z1.h, #0
-; SVE-NEXT: st1h { z0.h }, p0, [x0]
+; SVE-NEXT: cmpne p1.h, p0/z, z1.h, #0
+; SVE-NEXT: st1h { z0.h }, p1, [x0]
; SVE-NEXT: ret
%load = load <4 x i16>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x i16> %x, <4 x i16> %load
@@ -40,8 +40,8 @@ define void @test_masked_store_success_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %m
; SVE-NEXT: ptrue p0.s, vl4
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE-NEXT: shl v1.4s, v1.4s, #31
-; SVE-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; SVE-NEXT: st1w { z0.s }, p0, [x0]
+; SVE-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; SVE-NEXT: st1w { z0.s }, p1, [x0]
; SVE-NEXT: ret
%load = load <4 x i32>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %load
@@ -62,9 +62,9 @@ define void @test_masked_store_success_v4i64(<4 x i64> %x, ptr %ptr, <4 x i1> %m
; SVE-NEXT: shl v3.2d, v3.2d, #63
; SVE-NEXT: shl v2.2d, v2.2d, #63
; SVE-NEXT: cmpne p1.d, p0/z, z3.d, #0
-; SVE-NEXT: cmpne p0.d, p0/z, z2.d, #0
+; SVE-NEXT: cmpne p2.d, p0/z, z2.d, #0
; SVE-NEXT: st1d { z1.d }, p1, [x0, x8, lsl #3]
-; SVE-NEXT: st1d { z0.d }, p0, [x0]
+; SVE-NEXT: st1d { z0.d }, p2, [x0]
; SVE-NEXT: ret
%load = load <4 x i64>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %load
@@ -78,8 +78,8 @@ define void @test_masked_store_success_v4f16(<4 x half> %x, ptr %ptr, <4 x i1> %
; SVE-NEXT: shl v1.4h, v1.4h, #15
; SVE-NEXT: ptrue p0.h, vl4
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT: cmpne p0.h, p0/z, z1.h, #0
-; SVE-NEXT: st1h { z0.h }, p0, [x0]
+; SVE-NEXT: cmpne p1.h, p0/z, z1.h, #0
+; SVE-NEXT: st1h { z0.h }, p1, [x0]
; SVE-NEXT: ret
%load = load <4 x half>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x half> %x, <4 x half> %load
@@ -94,8 +94,8 @@ define void @test_masked_store_success_v4f32(<4 x float> %x, ptr %ptr, <4 x i1>
; SVE-NEXT: ptrue p0.s, vl4
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE-NEXT: shl v1.4s, v1.4s, #31
-; SVE-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; SVE-NEXT: st1w { z0.s }, p0, [x0]
+; SVE-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; SVE-NEXT: st1w { z0.s }, p1, [x0]
; SVE-NEXT: ret
%load = load <4 x float>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x float> %x, <4 x float> %load
@@ -116,9 +116,9 @@ define void @test_masked_store_success_v4f64(<4 x double> %x, ptr %ptr, <4 x i1>
; SVE-NEXT: shl v3.2d, v3.2d, #63
; SVE-NEXT: shl v2.2d, v2.2d, #63
; SVE-NEXT: cmpne p1.d, p0/z, z3.d, #0
-; SVE-NEXT: cmpne p0.d, p0/z, z2.d, #0
+; SVE-NEXT: cmpne p2.d, p0/z, z2.d, #0
; SVE-NEXT: st1d { z1.d }, p1, [x0, x8, lsl #3]
-; SVE-NEXT: st1d { z0.d }, p0, [x0]
+; SVE-NEXT: st1d { z0.d }, p2, [x0]
; SVE-NEXT: ret
%load = load <4 x double>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x double> %x, <4 x double> %load
@@ -132,8 +132,8 @@ define void @test_masked_store_success_v8i8(<8 x i8> %x, ptr %ptr, <8 x i1> %mas
; SVE-NEXT: shl v1.8b, v1.8b, #7
; SVE-NEXT: ptrue p0.b, vl8
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT: cmpne p0.b, p0/z, z1.b, #0
-; SVE-NEXT: st1b { z0.b }, p0, [x0]
+; SVE-NEXT: cmpne p1.b, p0/z, z1.b, #0
+; SVE-NEXT: st1b { z0.b }, p1, [x0]
; SVE-NEXT: ret
%load = load <8 x i8>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i8> %x, <8 x i8> %load
@@ -148,8 +148,8 @@ define void @test_masked_store_success_v8i16(<8 x i16> %x, ptr %ptr, <8 x i1> %m
; SVE-NEXT: ptrue p0.h, vl8
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE-NEXT: shl v1.8h, v1.8h, #15
-; SVE-NEXT: cmpne p0.h, p0/z, z1.h, #0
-; SVE-NEXT: st1h { z0.h }, p0, [x0]
+; SVE-NEXT: cmpne p1.h, p0/z, z1.h, #0
+; SVE-NEXT: st1h { z0.h }, p1, [x0]
; SVE-NEXT: ret
%load = load <8 x i16>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %load
@@ -171,9 +171,9 @@ define void @test_masked_store_success_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %m
; SVE-NEXT: shl v3.4s, v3.4s, #31
; SVE-NEXT: shl v2.4s, v2.4s, #31
; SVE-NEXT: cmpne p1.s, p0/z, z3.s, #0
-; SVE-NEXT: cmpne p0.s, p0/z, z2.s, #0
+; SVE-NEXT: cmpne p2.s, p0/z, z2.s, #0
; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
-; SVE-NEXT: st1w { z0.s }, p0, [x0]
+; SVE-NEXT: st1w { z0.s }, p2, [x0]
; SVE-NEXT: ret
%load = load <8 x i32>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load
@@ -211,12 +211,12 @@ define void @test_masked_store_success_v8i64(<8 x i64> %x, ptr %ptr, <8 x i1> %m
; SVE-NEXT: shl v4.2d, v7.2d, #63
; SVE-NEXT: cmpne p2.d, p0/z, z5.d, #0
; SVE-NEXT: cmpne p3.d, p0/z, z6.d, #0
-; SVE-NEXT: cmpne p0.d, p0/z, z4.d, #0
+; SVE-NEXT: cmpne p4.d, p0/z, z4.d, #0
; SVE-NEXT: st1d { z2.d }, p1, [x0, x8, lsl #3]
; SVE-NEXT: mov x8, #2 // =0x2
; SVE-NEXT: st1d { z3.d }, p2, [x0, x9, lsl #3]
; SVE-NEXT: st1d { z1.d }, p3, [x0, x8, lsl #3]
-; SVE-NEXT: st1d { z0.d }, p0, [x0]
+; SVE-NEXT: st1d { z0.d }, p4, [x0]
; SVE-NEXT: ret
%load = load <8 x i64>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %load
@@ -231,8 +231,8 @@ define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> %
; SVE-NEXT: ptrue p0.h, vl8
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE-NEXT: shl v1.8h, v1.8h, #15
-; SVE-NEXT: cmpne p0.h, p0/z, z1.h, #0
-; SVE-NEXT: st1h { z0.h }, p0, [x0]
+; SVE-NEXT: cmpne p1.h, p0/z, z1.h, #0
+; SVE-NEXT: st1h { z0.h }, p1, [x0]
; SVE-NEXT: ret
%load = load <8 x half>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x half> %x, <8 x half> %load
@@ -254,9 +254,9 @@ define void @test_masked_store_success_v8f32(<8 x float> %x, ptr %ptr, <8 x i1>
; SVE-NEXT: shl v3.4s, v3.4s, #31
; SVE-NEXT: shl v2.4s, v2.4s, #31
; SVE-NEXT: cmpne p1.s, p0/z, z3.s, #0
-; SVE-NEXT: cmpne p0.s, p0/z, z2.s, #0
+; SVE-NEXT: cmpne p2.s, p0/z, z2.s, #0
; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
-; SVE-NEXT: st1w { z0.s }, p0, [x0]
+; SVE-NEXT: st1w { z0.s }, p2, [x0]
; SVE-NEXT: ret
%load = load <8 x float>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x float> %x, <8 x float> %load
@@ -294,12 +294,12 @@ define void @test_masked_store_success_v8f64(<8 x double> %x, ptr %ptr, <8 x i1>
; SVE-NEXT: shl v4.2d, v7.2d, #63
; SVE-NEXT: cmpne p2.d, p0/z, z5.d, #0
; SVE-NEXT: cmpne p3.d, p0/z, z6.d, #0
-; SVE-NEXT: cmpne p0.d, p0/z, z4.d, #0
+; SVE-NEXT: cmpne p4.d, p0/z, z4.d, #0
; SVE-NEXT: st1d { z2.d }, p1, [x0, x8, lsl #3]
; SVE-NEXT: mov x8, #2 // =0x2
; SVE-NEXT: st1d { z3.d }, p2, [x0, x9, lsl #3]
; SVE-NEXT: st1d { z1.d }, p3, [x0, x8, lsl #3]
-; SVE-NEXT: st1d { z0.d }, p0, [x0]
+; SVE-NEXT: st1d { z0.d }, p4, [x0]
; SVE-NEXT: ret
%load = load <8 x double>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x double> %x, <8 x double> %load
@@ -313,8 +313,8 @@ define void @test_masked_store_success_v16i8(<16 x i8> %x, ptr %ptr, <16 x i1> %
; SVE-NEXT: shl v1.16b, v1.16b, #7
; SVE-NEXT: ptrue p0.b, vl16
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT: cmpne p0.b, p0/z, z1.b, #0
-; SVE-NEXT: st1b { z0.b }, p0, [x0]
+; SVE-NEXT: cmpne p1.b, p0/z, z1.b, #0
+; SVE-NEXT: st1b { z0.b }, p1, [x0]
; SVE-NEXT: ret
%load = load <16 x i8>, ptr %ptr, align 32
%sel = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %load
@@ -334,9 +334,9 @@ define void @test_masked_store_success_v16i16(<16 x i16> %x, ptr %ptr, <16 x i1>
; SVE-NEXT: shl v3.8h, v3.8h, #15
; SVE-NEXT: shl v2.8h, v2.8h, #15
; SVE-NEXT: cmpne p1.h, p0/z, z3.h, #0
-; SVE-NEXT: cmpne p0.h, p0/z, z2.h, #0
+; SVE-NEXT: cmpne p2.h, p0/z, z2.h, #0
; SVE-NEXT: st1h { z1.h }, p1, [x0, x8, lsl #1]
-; SVE-NEXT: st1h { z0.h }, p0, [x0]
+; SVE-NEXT: st1h { z0.h }, p2, [x0]
; SVE-NEXT: ret
%load = load <16 x i16>, ptr %ptr, align 32
%sel = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %load
@@ -368,14 +368,14 @@ define void @test_masked_store_success_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1>
; SVE-NEXT: cmpne p1.s, p0/z, z6.s, #0
; SVE-NEXT: shl v7.4s, v7.4s, #31
; SVE-NEXT: shl v5.4s, v5.4s, #31
+; SVE-NEXT: cmpne p4.s, p0/z, z4.s, #0
; SVE-NEXT: cmpne p2.s, p0/z, z7.s, #0
; SVE-NEXT: cmpne p3.s, p0/z, z5.s, #0
-; SVE-NEXT: cmpne p0.s, p0/z, z4.s, #0
; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
; SVE-NEXT: mov x8, #12 // =0xc
+; SVE-NEXT: st1w { z0.s }, p4, [x0]
; SVE-NEXT: st1w { z2.s }, p2, [x0, x9, lsl #2]
; SVE-NEXT: st1w { z3.s }, p3, [x0, x8, lsl #2]
-; SVE-NEXT: st1w { z0.s }, p0, [x0]
; SVE-NEXT: ret
%load = load <16 x i32>, ptr %ptr, align 32
%sel = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %load
@@ -449,10 +449,10 @@ define void @test_masked_store_success_v32i8(<32 x i8> %x, ptr %ptr, <32 x i1> %
; SVE-NEXT: mov w8, #16 // =0x10
; SVE-NEXT: shl v2.16b, v2.16b, #7
; SVE-NEXT: shl v3.16b, v3.16b, #7
+; SVE-NEXT: cmpne p2.b, p0/z, z2.b, #0
; SVE-NEXT: cmpne p1.b, p0/z, z3.b, #0
-; SVE-NEXT: cmpne p0.b, p0/z, z2.b, #0
+; SVE-NEXT: st1b { z0.b }, p2, [x0]
; SVE-NEXT: st1b { z1.b }, p1, [x0, x8]
-; SVE-NEXT: st1b { z0.b }, p0, [x0]
; SVE-NEXT: ret
%load = load <32 x i8>, ptr %ptr, align 32
%sel = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %load
@@ -537,13 +537,13 @@ define void @test_masked_store_success_v32i16(<32 x i16> %x, ptr %ptr, <32 x i1>
; SVE-NEXT: shl v4.8h, v7.8h, #15
; SVE-NEXT: cmpne p2.h, p0/z, z5.h, #0
; SVE-NEXT: cmpne p3.h, p0/z, z6.h, #0
-; SVE-NEXT: cmpne p0.h, p0/z, z4.h, #0
+; SVE-NEXT: cmpne p4.h, p0/z, z4.h, #0
; SVE-NEXT: st1h { z2.h }, p1, [x0, x8, lsl #1]
; SVE-NEXT: mov x8, #24 // =0x18
; SVE-NEXT: st1h { z3.h }, p2, [x0, x8, lsl #1]
; SVE-NEXT: mov x8, #8 // =0x8
; SVE-NEXT: st1h { z1.h }, p3, [x0, x8, lsl #1]
-; SVE-NEXT: st1h { z0.h }, p0, [x0]
+; SVE-NEXT: st1h { z0.h }, p4, [x0]
; SVE-NEXT: ret
%load = load <32 x i16>, ptr %ptr, align 32
%sel = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %load
@@ -692,12 +692,12 @@ define void @test_masked_store_success_v64i8(<64 x i8> %x, ptr %ptr, <64 x i1> %
; SVE-NEXT: shl v5.16b, v6.16b, #7
; SVE-NEXT: cmpne p2.b, p0/z, z4.b, #0
; SVE-NEXT: cmpne p3.b, p0/z, z7.b, #0
-; SVE-NEXT: cmpne p0.b, p0/z, z5.b, #0
+; SVE-NEXT: cmpne p4.b, p0/z, z5.b, #0
; SVE-NEXT: st1b { z2.b }, p1, [x0, x8]
; SVE-NEXT: mov w8, #16 // =0x10
; SVE-NEXT: st1b { z3.b }, p2, [x0, x9]
; SVE-NEXT: st1b { z1.b }, p3, [x0, x8]
-; SVE-NEXT: st1b { z0.b }, p0, [x0]
+; SVE-NEXT: st1b { z0.b }, p4, [x0]
; SVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; SVE-NEXT: ret
%load = load <64 x i8>, ptr %ptr, align 32
@@ -715,8 +715,8 @@ define void @test_masked_store_success_invert_mask_v4i32(<4 x i32> %x, ptr %ptr,
; SVE-NEXT: eor v1.8b, v1.8b, v2.8b
; SVE-NEXT: ushll v1.4s, v1.4h, #0
; SVE-NEXT: shl v1.4s, v1.4s, #31
-; SVE-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; SVE-NEXT: st1w { z0.s }, p0, [x0]
+; SVE-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; SVE-NEXT: st1w { z0.s }, p1, [x0]
; SVE-NEXT: ret
%load = load <4 x i32>, ptr %ptr, align 32
%sel = select <4 x i1> %mask, <4 x i32> %load, <4 x i32> %x
@@ -738,9 +738,9 @@ define void @test_masked_store_success_invert_mask_v8i32(<8 x i32> %x, ptr %ptr,
; SVE-NEXT: shl v3.4s, v3.4s, #31
; SVE-NEXT: shl v2.4s, v2.4s, #31
; SVE-NEXT: cmpge p1.s, p0/z, z3.s, #0
-; SVE-NEXT: cmpge p0.s, p0/z, z2.s, #0
+; SVE-NEXT: cmpge p2.s, p0/z, z2.s, #0
; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
-; SVE-NEXT: st1w { z0.s }, p0, [x0]
+; SVE-NEXT: st1w { z0.s }, p2, [x0]
; SVE-NEXT: ret
%load = load <8 x i32>, ptr %ptr, align 32
%sel = select <8 x i1> %mask, <8 x i32> %load, <8 x i32> %x
@@ -771,15 +771,15 @@ define void @test_masked_store_success_invert_mask_v16i32(<16 x i32> %x, ptr %pt
; SVE-NEXT: cmpge p1.s, p0/z, z6.s, #0
; SVE-NEXT: shl v7.4s, v7.4s, #31
; SVE-NEXT: shl v5.4s, v5.4s, #31
+; SVE-NEXT: cmpge p4.s, p0/z, z4.s, #0
; SVE-NEXT: cmpge p2.s, p0/z, z7.s, #0
; SVE-NEXT: cmpge p3.s, p0/z, z5.s, #0
-; SVE-NEXT: cmpge p0.s, p0/z, z4.s, #0
; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
; SVE-NEXT: mov x8, #8 // =0x8
+; SVE-NEXT: st1w { z0.s }, p4, [x0]
; SVE-NEXT: st1w { z2.s }, p2, [x0, x8, lsl #2]
; SVE-NEXT: mov x8, #12 // =0xc
; SVE-NEXT: st1w { z3.s }, p3, [x0, x8, lsl #2]
-; SVE-NEXT: st1w { z0.s }, p0, [x0]
; SVE-NEXT: ret
%load = load <16 x i32>, ptr %ptr, align 32
%sel = select <16 x i1> %mask, <16 x i32> %load, <16 x i32> %x
@@ -925,11 +925,11 @@ define void @test_masked_store_multiple_v8i32(<8 x i32> %x, <8 x i32> %y, ptr %p
; SVE-NEXT: cmpne p1.s, p0/z, z7.s, #0
; SVE-NEXT: cmlt v5.4s, v5.4s, #0
; SVE-NEXT: ldp q7, q16, [x1]
-; SVE-NEXT: cmpne p0.s, p0/z, z4.s, #0
+; SVE-NEXT: cmpne p2.s, p0/z, z4.s, #0
; SVE-NEXT: bif v2.16b, v7.16b, v6.16b
; SVE-NEXT: bif v3.16b, v16.16b, v5.16b
; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
-; SVE-NEXT: st1w { z0.s }, p0, [x0]
+; SVE-NEXT: st1w { z0.s }, p2, [x0]
; SVE-NEXT: stp q2, q3, [x1]
; SVE-NEXT: ret
%load = load <8 x i32>, ptr %ptr1, align 32
@@ -989,7 +989,7 @@ define void @test_masked_store_multiple_v8i64(<8 x i64> %x, <8 x i64> %y, ptr %p
; SVE-NEXT: cmlt v17.2d, v18.2d, #0
; SVE-NEXT: shl v18.2d, v22.2d, #63
; SVE-NEXT: ldp q19, q22, [x1, #32]
-; SVE-NEXT: cmpne p1.d, p1/z, z16.d, #0
+; SVE-NEXT: cmpne p4.d, p1/z, z16.d, #0
; SVE-NEXT: cmlt v16.2d, v20.2d, #0
; SVE-NEXT: cmlt v20.2d, v21.2d, #0
; SVE-NEXT: cmlt v18.2d, v18.2d, #0
@@ -1000,7 +1000,7 @@ define void @test_masked_store_multiple_v8i64(<8 x i64> %x, <8 x i64> %y, ptr %p
; SVE-NEXT: st1d { z3.d }, p3, [x0, x8, lsl #3]
; SVE-NEXT: mov v2.16b, v16.16b
; SVE-NEXT: mov v3.16b, v20.16b
-; SVE-NEXT: st1d { z1.d }, p1, [x0, x9, lsl #3]
+; SVE-NEXT: st1d { z1.d }, p4, [x0, x9, lsl #3]
; SVE-NEXT: mov v1.16b, v18.16b
; SVE-NEXT: st1d { z0.d }, p0, [x0]
; SVE-NEXT: bsl v2.16b, v7.16b, v22.16b
@@ -1026,8 +1026,8 @@ define void @test_masked_store_unaligned_v4i32(<4 x i32> %data, ptr %ptr, <4 x i
; SVE-NEXT: add x8, x0, #1
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
; SVE-NEXT: shl v1.4s, v1.4s, #31
-; SVE-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; SVE-NEXT: st1w { z0.s }, p0, [x8]
+; SVE-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; SVE-NEXT: st1w { z0.s }, p1, [x8]
; SVE-NEXT: ret
%ptr_i8 = getelementptr i8, ptr %ptr, i32 1
%ptr_vec = bitcast ptr %ptr_i8 to ptr
@@ -1051,9 +1051,9 @@ define void @test_masked_store_unaligned_v4i64(<4 x i64> %data, ptr %ptr, <4 x i
; SVE-NEXT: shl v3.2d, v3.2d, #63
; SVE-NEXT: shl v2.2d, v2.2d, #63
; SVE-NEXT: cmpne p1.d, p0/z, z3.d, #0
-; SVE-NEXT: cmpne p0.d, p0/z, z2.d, #0
+; SVE-NEXT: cmpne p2.d, p0/z, z2.d, #0
; SVE-NEXT: st1d { z1.d }, p1, [x8]
-; SVE-NEXT: st1d { z0.d }, p0, [x9]
+; SVE-NEXT: st1d { z0.d }, p2, [x9]
; SVE-NEXT: ret
%ptr_i8 = getelementptr i8, ptr %ptr, i64 1
%ptr_vec = bitcast ptr %ptr_i8 to ptr
@@ -1078,9 +1078,9 @@ define void @test_masked_store_unaligned_v8i32(<8 x i32> %data, ptr %ptr, <8 x i
; SVE-NEXT: shl v3.4s, v3.4s, #31
; SVE-NEXT: shl v2.4s, v2.4s, #31
; SVE-NEXT: cmpne p1.s, p0/z, z3.s, #0
-; SVE-NEXT: cmpne p0.s, p0/z, z2.s, #0
+; SVE-NEXT: cmpne p2.s, p0/z, z2.s, #0
; SVE-NEXT: st1w { z0.s }, p1, [x8]
-; SVE-NEXT: st1w { z1.s }, p0, [x9]
+; SVE-NEXT: st1w { z1.s }, p2, [x9]
; SVE-NEXT: ret
%ptr_i8 = getelementptr i8, ptr %ptr, i32 1
%ptr_vec = bitcast ptr %ptr_i8 to ptr
@@ -1120,13 +1120,13 @@ define void @test_masked_store_unaligned_v8i64(<8 x i64> %data, ptr %ptr, <8 x i
; SVE-NEXT: shl v4.2d, v7.2d, #63
; SVE-NEXT: cmpne p2.d, p0/z, z5.d, #0
; SVE-NEXT: cmpne p3.d, p0/z, z6.d, #0
-; SVE-NEXT: cmpne p0.d, p0/z, z4.d, #0
+; SVE-NEXT: cmpne p4.d, p0/z, z4.d, #0
; SVE-NEXT: st1d { z2.d }, p1, [x8]
; SVE-NEXT: add x8, x0, #1
; SVE-NEXT: st1d { z3.d }, p2, [x9]
; SVE-NEXT: add x9, x0, #17
; SVE-NEXT: st1d { z0.d }, p3, [x8]
-; SVE-NEXT: st1d { z1.d }, p0, [x9]
+; SVE-NEXT: st1d { z1.d }, p4, [x9]
; SVE-NEXT: ret
%ptr_i8 = getelementptr i8, ptr %ptr, i64 1
%ptr_vec = bitcast ptr %ptr_i8 to ptr
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
index fc672dfa84edd..807172139faff 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -225,22 +225,22 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
; CHECK-NEXT: mov z6.d, z0.d
; CHECK-NEXT: mov z7.d, z1.d
; CHECK-NEXT: add x8, x8, x10
-; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
-; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
-; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
-; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
-; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
-; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
-; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
+; CHECK-NEXT: cmpne p2.d, p1/z, z2.d, #0
+; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
+; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
+; CHECK-NEXT: ld1d { z2.d }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z4.d }, p1/z, [x1, #1, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p2/z, [x0]
+; CHECK-NEXT: ld1d { z5.d }, p2/z, [x1]
; CHECK-NEXT: add x1, x1, x11
; CHECK-NEXT: add x0, x0, x11
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z1.d, p2/m, z7.d
-; CHECK-NEXT: mov z0.d, p1/m, z6.d
+; CHECK-NEXT: mov z1.d, p1/m, z7.d
; CHECK-NEXT: whilelo p1.d, x8, x9
+; CHECK-NEXT: mov z0.d, p2/m, z6.d
; CHECK-NEXT: b.mi .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
index c0c31427307b5..ae3a0e2ac1b6b 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
@@ -10,8 +10,8 @@ define fastcc i8 @allocno_reload_assign(ptr %p) {
; CHECK: // %bb.0:
; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: uzp1 p0.s, p0.s, p0.s
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: uzp1 p0.s, p1.s, p0.s
; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h
; CHECK-NEXT: uzp1 p8.b, p0.b, p0.b
; CHECK-NEXT: mov z0.b, p8/z, #1 // =0x1
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll
index 7705d8949ca1e..d72be50158a8f 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll
@@ -22,8 +22,8 @@ define <vscale x 2 x i64> @insert_vscale_2_i64_zero(<vscale x 2 x i64> %vec, i64
; CHECK-GI-NEXT: index z1.d, #0, #1
; CHECK-GI-NEXT: ptrue p0.d
; CHECK-GI-NEXT: mov z2.d, x8
-; CHECK-GI-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-GI-NEXT: mov z0.d, p0/m, x0
+; CHECK-GI-NEXT: cmpeq p1.d, p0/z, z1.d, z2.d
+; CHECK-GI-NEXT: mov z0.d, p1/m, x0
; CHECK-GI-NEXT: ret
entry:
%d = insertelement <vscale x 2 x i64> %vec, i64 %elt, i64 0
@@ -36,8 +36,8 @@ define <vscale x 2 x i64> @insert_vscale_2_i64(<vscale x 2 x i64> %vec, i64 %elt
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x1
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: mov z0.d, p0/m, x0
+; CHECK-NEXT: cmpeq p1.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: mov z0.d, p1/m, x0
; CHECK-NEXT: ret
entry:
%d = insertelement <vscale x 2 x i64> %vec, i64 %elt, i64 %idx
@@ -57,8 +57,8 @@ define <vscale x 4 x i32> @insert_vscale_4_i32_zero(<vscale x 4 x i32> %vec, i32
; CHECK-GI-NEXT: index z1.s, #0, #1
; CHECK-GI-NEXT: ptrue p0.s
; CHECK-GI-NEXT: mov z2.s, w8
-; CHECK-GI-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-GI-NEXT: mov z0.s, p0/m, w0
+; CHECK-GI-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
+; CHECK-GI-NEXT: mov z0.s, p1/m, w0
; CHECK-GI-NEXT: ret
entry:
%d = insertelement <vscale x 4 x i32> %vec, i32 %elt, i64 0
@@ -71,8 +71,8 @@ define <vscale x 4 x i32> @insert_vscale_4_i32(<vscale x 4 x i32> %vec, i32 %elt
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w1
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, w0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p1/m, w0
; CHECK-NEXT: ret
entry:
%d = insertelement <vscale x 4 x i32> %vec, i32 %elt, i64 %idx
@@ -96,8 +96,8 @@ define <vscale x 8 x i16> @insert_vscale_8_i16(<vscale x 8 x i16> %vec, i16 %elt
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w1
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, w0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p1/m, w0
; CHECK-NEXT: ret
entry:
%d = insertelement <vscale x 8 x i16> %vec, i16 %elt, i64 %idx
@@ -121,8 +121,8 @@ define <vscale x 16 x i8> @insert_vscale_16_i8(<vscale x 16 x i8> %vec, i8 %elt,
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w1
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, w0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
+; CHECK-NEXT: mov z0.b, p1/m, w0
; CHECK-NEXT: ret
entry:
%d = insertelement <vscale x 16 x i8> %vec, i8 %elt, i64 %idx
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index 38ea26a4fb287..5f275772c6678 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -240,9 +240,9 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: ctz_and_nxv16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: brkb p0.b, p0/z, p1.b
; CHECK-NEXT: cntp x0, p0, p0.b
; CHECK-NEXT: ret
%cmp = icmp ne <vscale x 16 x i8> %a, %b
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll b/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
index f4fdd52f117d1..0bf9e0de3055b 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
@@ -21,9 +21,9 @@ define <vscale x 16 x i1> @match_nxv16i8_v2i8(<vscale x 16 x i8> %op1, <2 x i8>
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: mov z1.b, w8
+; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z2.b
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
-; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
-; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b
+; CHECK-NEXT: sel p1.b, p3, p3.b, p2.b
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
; CHECK-NEXT: ret
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask)
@@ -142,8 +142,8 @@ define <16 x i1> @match_v16i8_v8i8(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mas
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov z1.d, d1
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: match p0.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -158,8 +158,8 @@ define <16 x i1> @match_v16i8_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %m
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: match p0.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -175,8 +175,8 @@ define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z1.d, d1
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: match p0.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -203,8 +203,8 @@ define <8 x i1> @match_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: shl v2.8h, v2.8h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: match p0.h, p1/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
@@ -221,8 +221,8 @@ define <8 x i1> @match_v8i8_v16i8(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask)
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: match p0.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -490,8 +490,8 @@ define <vscale x 2 x i1> @match_nxv2xi64_v2i64(<vscale x 2 x i64> %op1, <2 x i64
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z1.d, d1
; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z2.d
-; CHECK-NEXT: cmpeq p1.d, p1/z, z0.d, z1.d
-; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b
+; CHECK-NEXT: cmpeq p3.d, p1/z, z0.d, z1.d
+; CHECK-NEXT: sel p1.b, p3, p3.b, p2.b
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
; CHECK-NEXT: ret
%r = tail call <vscale x 2 x i1> @llvm.experimental.vector.match(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask)
diff --git a/llvm/test/CodeGen/AArch64/rcpc3-sve.ll b/llvm/test/CodeGen/AArch64/rcpc3-sve.ll
index d4c80d8eea198..78935ec66a902 100644
--- a/llvm/test/CodeGen/AArch64/rcpc3-sve.ll
+++ b/llvm/test/CodeGen/AArch64/rcpc3-sve.ll
@@ -25,8 +25,8 @@ define hidden <vscale x 2 x i64> @test_load_sve_lane1(ptr nocapture noundef read
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: ldapr x8, [x0]
-; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: mov z0.d, p0/m, x8
+; CHECK-NEXT: cmpeq p1.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: mov z0.d, p1/m, x8
; CHECK-NEXT: ret
%1 = load atomic i64, ptr %a acquire, align 8
%vldap1_lane = insertelement <vscale x 2 x i64> %b, i64 %1, i64 1
diff --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
index 06cabf3631a13..3bbcaca7468b1 100644
--- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
+++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
@@ -842,8 +842,8 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 {
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill
; CHECK-NEXT: and z1.b, z1.b, #0x1
-; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
-; CHECK-NEXT: str p0, [x8, #7, mul vl] // 2-byte Spill
+; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, #0
+; CHECK-NEXT: str p1, [x8, #7, mul vl] // 2-byte Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload
; CHECK-NEXT: bl use_v8i1
diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-select.ll b/llvm/test/CodeGen/AArch64/sve-cmp-select.ll
index 945634259812d..1fa64887c5246 100644
--- a/llvm/test/CodeGen/AArch64/sve-cmp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cmp-select.ll
@@ -5,8 +5,8 @@ define <vscale x 16 x i8> @vselect_cmp_ne(<vscale x 16 x i8> %a, <vscale x 16 x
; CHECK-LABEL: vselect_cmp_ne:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: sel z0.b, p0, z1.b, z2.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: sel z0.b, p1, z1.b, z2.b
; CHECK-NEXT: ret
%cmp = icmp ne <vscale x 16 x i8> %a, %b
%d = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c
@@ -17,8 +17,8 @@ define <vscale x 16 x i8> @vselect_cmp_sgt(<vscale x 16 x i8> %a, <vscale x 16 x
; CHECK-LABEL: vselect_cmp_sgt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: sel z0.b, p0, z1.b, z2.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: sel z0.b, p1, z1.b, z2.b
; CHECK-NEXT: ret
%cmp = icmp sgt <vscale x 16 x i8> %a, %b
%d = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c
@@ -29,8 +29,8 @@ define <vscale x 16 x i8> @vselect_cmp_ugt(<vscale x 16 x i8> %a, <vscale x 16 x
; CHECK-LABEL: vselect_cmp_ugt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: sel z0.b, p0, z1.b, z2.b
+; CHECK-NEXT: cmphi p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: sel z0.b, p1, z1.b, z2.b
; CHECK-NEXT: ret
%cmp = icmp ugt <vscale x 16 x i8> %a, %b
%d = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c
diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll
index 743623b86f1b0..6f9fbc401224c 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll
@@ -175,8 +175,8 @@ define <vscale x 4 x i1> @fcvtzs_nxv4f64_to_nxv4i1(<vscale x 4 x double> %a) {
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, #0
+; CHECK-NEXT: uzp1 p0.s, p2.s, p1.s
; CHECK-NEXT: ret
%res = fptosi <vscale x 4 x double> %a to <vscale x 4 x i1>
ret <vscale x 4 x i1> %res
@@ -200,8 +200,8 @@ define <vscale x 8 x i1> @fcvtzs_nxv8f32_to_nxv8i1(<vscale x 8 x float> %a) {
; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
+; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
+; CHECK-NEXT: uzp1 p0.h, p2.h, p1.h
; CHECK-NEXT: ret
%res = fptosi <vscale x 8 x float> %a to <vscale x 8 x i1>
ret <vscale x 8 x i1> %res
@@ -453,8 +453,8 @@ define <vscale x 4 x i1> @fcvtzu_nxv4f64_to_nxv4i1(<vscale x 4 x double> %a) {
; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, #0
+; CHECK-NEXT: uzp1 p0.s, p2.s, p1.s
; CHECK-NEXT: ret
%res = fptoui <vscale x 4 x double> %a to <vscale x 4 x i1>
ret <vscale x 4 x i1> %res
@@ -478,8 +478,8 @@ define <vscale x 8 x i1> @fcvtzu_nxv8f32_to_nxv8i1(<vscale x 8 x float> %a) {
; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
+; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
+; CHECK-NEXT: uzp1 p0.h, p2.h, p1.h
; CHECK-NEXT: ret
%res = fptoui <vscale x 8 x float> %a to <vscale x 8 x i1>
ret <vscale x 8 x i1> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
index d1e9dc13f50e8..873224da3909f 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
@@ -38,10 +38,10 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.h, vl16
; CHECK-NEXT: and z0.h, z0.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p2.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: sel z0.h, p2, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <16 x half>, ptr %a
@@ -59,13 +59,13 @@ define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ptrue p1.h, vl16
; VBITS_GE_256-NEXT: and z0.h, z0.h, #0x1
-; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; VBITS_GE_256-NEXT: cmpne p2.h, p0/z, z0.h, #0
; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1h { z2.h }, p1/z, [x1, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z3.h }, p1/z, [x1]
-; VBITS_GE_256-NEXT: sel z0.h, p0, z0.h, z2.h
-; VBITS_GE_256-NEXT: sel z1.h, p0, z1.h, z3.h
+; VBITS_GE_256-NEXT: sel z0.h, p2, z0.h, z2.h
+; VBITS_GE_256-NEXT: sel z1.h, p2, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z0.h }, p1, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x0]
; VBITS_GE_256-NEXT: ret
@@ -76,10 +76,10 @@ define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ptrue p0.h
; VBITS_GE_512-NEXT: ptrue p1.h, vl32
; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x1
-; VBITS_GE_512-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; VBITS_GE_512-NEXT: cmpne p2.h, p0/z, z0.h, #0
; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1h { z1.h }, p1/z, [x1]
-; VBITS_GE_512-NEXT: sel z0.h, p0, z0.h, z1.h
+; VBITS_GE_512-NEXT: sel z0.h, p2, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <32 x half>, ptr %a
@@ -96,10 +96,10 @@ define void @select_v64f16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.h, vl64
; CHECK-NEXT: and z0.h, z0.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p2.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: sel z0.h, p2, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <64 x half>, ptr %a
@@ -116,10 +116,10 @@ define void @select_v128f16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.h, vl128
; CHECK-NEXT: and z0.h, z0.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p2.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: sel z0.h, p2, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <128 x half>, ptr %a
@@ -162,10 +162,10 @@ define void @select_v8f32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: ptrue p1.s, vl8
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: sel z0.s, p2, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <8 x float>, ptr %a
@@ -183,13 +183,13 @@ define void @select_v16f32(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-NEXT: mov z0.s, w8
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
-; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; VBITS_GE_256-NEXT: cmpne p2.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z2.s }, p1/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z3.s }, p1/z, [x1]
-; VBITS_GE_256-NEXT: sel z0.s, p0, z0.s, z2.s
-; VBITS_GE_256-NEXT: sel z1.s, p0, z1.s, z3.s
+; VBITS_GE_256-NEXT: sel z0.s, p2, z0.s, z2.s
+; VBITS_GE_256-NEXT: sel z1.s, p2, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x0]
; VBITS_GE_256-NEXT: ret
@@ -200,10 +200,10 @@ define void @select_v16f32(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ptrue p0.s
; VBITS_GE_512-NEXT: mov z0.s, w8
; VBITS_GE_512-NEXT: ptrue p1.s, vl16
-; VBITS_GE_512-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; VBITS_GE_512-NEXT: cmpne p2.s, p0/z, z0.s, #0
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p1/z, [x1]
-; VBITS_GE_512-NEXT: sel z0.s, p0, z0.s, z1.s
+; VBITS_GE_512-NEXT: sel z0.s, p2, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <16 x float>, ptr %a
@@ -220,10 +220,10 @@ define void @select_v32f32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: ptrue p1.s, vl32
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: sel z0.s, p2, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <32 x float>, ptr %a
@@ -240,10 +240,10 @@ define void @select_v64f32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: ptrue p1.s, vl64
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: sel z0.s, p2, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <64 x float>, ptr %a
@@ -287,10 +287,10 @@ define void @select_v4f64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ptrue p1.d, vl4
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: sel z0.d, p2, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <4 x double>, ptr %a
@@ -309,13 +309,13 @@ define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-NEXT: mov z0.d, x8
; VBITS_GE_256-NEXT: ptrue p1.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
-; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; VBITS_GE_256-NEXT: cmpne p2.d, p0/z, z0.d, #0
; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z2.d }, p1/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1]
-; VBITS_GE_256-NEXT: sel z0.d, p0, z0.d, z2.d
-; VBITS_GE_256-NEXT: sel z1.d, p0, z1.d, z3.d
+; VBITS_GE_256-NEXT: sel z0.d, p2, z0.d, z2.d
+; VBITS_GE_256-NEXT: sel z1.d, p2, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p1, [x0]
; VBITS_GE_256-NEXT: ret
@@ -327,10 +327,10 @@ define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ptrue p0.d
; VBITS_GE_512-NEXT: mov z0.d, x8
; VBITS_GE_512-NEXT: ptrue p1.d, vl8
-; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; VBITS_GE_512-NEXT: cmpne p2.d, p0/z, z0.d, #0
; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1]
-; VBITS_GE_512-NEXT: sel z0.d, p0, z0.d, z1.d
+; VBITS_GE_512-NEXT: sel z0.d, p2, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <8 x double>, ptr %a
@@ -348,10 +348,10 @@ define void @select_v16f64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ptrue p1.d, vl16
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: sel z0.d, p2, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <16 x double>, ptr %a
@@ -369,10 +369,10 @@ define void @select_v32f64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ptrue p1.d, vl32
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: sel z0.d, p2, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <32 x double>, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
index 6f4d257039bca..ca2a652902d3f 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
@@ -41,10 +41,10 @@ define void @insertelement_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: ptrue p1.h, vl16
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: cmpeq p2.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: fmov h1, #5.00000000
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: mov z0.h, p2/m, h1
; CHECK-NEXT: st1h { z0.h }, p1, [x1]
; CHECK-NEXT: ret
%op1 = load <16 x half>, ptr %a
@@ -62,10 +62,10 @@ define void @insertelement_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mov z1.h, w8
; VBITS_GE_256-NEXT: ptrue p1.h, vl16
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
-; VBITS_GE_256-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z0.h, z1.h
; VBITS_GE_256-NEXT: fmov h0, #5.00000000
; VBITS_GE_256-NEXT: ld1h { z1.h }, p1/z, [x0, x8, lsl #1]
-; VBITS_GE_256-NEXT: mov z1.h, p0/m, h0
+; VBITS_GE_256-NEXT: mov z1.h, p2/m, h0
; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x1, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z0.h }, p1, [x1]
@@ -78,10 +78,10 @@ define void @insertelement_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ptrue p0.h
; VBITS_GE_512-NEXT: mov z1.h, w8
; VBITS_GE_512-NEXT: ptrue p1.h, vl32
-; VBITS_GE_512-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; VBITS_GE_512-NEXT: cmpeq p2.h, p0/z, z0.h, z1.h
; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: fmov h1, #5.00000000
-; VBITS_GE_512-NEXT: mov z0.h, p0/m, h1
+; VBITS_GE_512-NEXT: mov z0.h, p2/m, h1
; VBITS_GE_512-NEXT: st1h { z0.h }, p1, [x1]
; VBITS_GE_512-NEXT: ret
%op1 = load <32 x half>, ptr %a
@@ -98,10 +98,10 @@ define void @insertelement_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: ptrue p1.h, vl64
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: cmpeq p2.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: fmov h1, #5.00000000
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: mov z0.h, p2/m, h1
; CHECK-NEXT: st1h { z0.h }, p1, [x1]
; CHECK-NEXT: ret
%op1 = load <64 x half>, ptr %a
@@ -118,10 +118,10 @@ define void @insertelement_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: ptrue p1.h, vl128
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: cmpeq p2.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: fmov h1, #5.00000000
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: mov z0.h, p2/m, h1
; CHECK-NEXT: st1h { z0.h }, p1, [x1]
; CHECK-NEXT: ret
%op1 = load <128 x half>, ptr %a
@@ -162,10 +162,10 @@ define void @insertelement_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: ptrue p1.s, vl8
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpeq p2.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: fmov s1, #5.00000000
-; CHECK-NEXT: mov z0.s, p0/m, s1
+; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: st1w { z0.s }, p1, [x1]
; CHECK-NEXT: ret
%op1 = load <8 x float>, ptr %a
@@ -183,10 +183,10 @@ define void @insertelement_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mov z1.s, w8
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
-; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z0.s, z1.s
; VBITS_GE_256-NEXT: fmov s0, #5.00000000
; VBITS_GE_256-NEXT: ld1w { z1.s }, p1/z, [x0, x8, lsl #2]
-; VBITS_GE_256-NEXT: mov z1.s, p0/m, s0
+; VBITS_GE_256-NEXT: mov z1.s, p2/m, s0
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z0.s }, p1, [x1]
@@ -199,10 +199,10 @@ define void @insertelement_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ptrue p0.s
; VBITS_GE_512-NEXT: mov z1.s, w8
; VBITS_GE_512-NEXT: ptrue p1.s, vl16
-; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; VBITS_GE_512-NEXT: cmpeq p2.s, p0/z, z0.s, z1.s
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: fmov s1, #5.00000000
-; VBITS_GE_512-NEXT: mov z0.s, p0/m, s1
+; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: st1w { z0.s }, p1, [x1]
; VBITS_GE_512-NEXT: ret
%op1 = load <16 x float>, ptr %a
@@ -219,10 +219,10 @@ define void @insertelement_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: ptrue p1.s, vl32
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpeq p2.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: fmov s1, #5.00000000
-; CHECK-NEXT: mov z0.s, p0/m, s1
+; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: st1w { z0.s }, p1, [x1]
; CHECK-NEXT: ret
%op1 = load <32 x float>, ptr %a
@@ -239,10 +239,10 @@ define void @insertelement_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: ptrue p1.s, vl64
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpeq p2.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: fmov s1, #5.00000000
-; CHECK-NEXT: mov z0.s, p0/m, s1
+; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: st1w { z0.s }, p1, [x1]
; CHECK-NEXT: ret
%op1 = load <64 x float>, ptr %a
@@ -281,10 +281,10 @@ define void @insertelement_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: ptrue p1.d, vl4
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: cmpeq p2.d, p0/z, z0.d, z1.d
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: fmov d1, #5.00000000
-; CHECK-NEXT: mov z0.d, p0/m, d1
+; CHECK-NEXT: mov z0.d, p2/m, d1
; CHECK-NEXT: st1d { z0.d }, p1, [x1]
; CHECK-NEXT: ret
%op1 = load <4 x double>, ptr %a
@@ -302,10 +302,10 @@ define void @insertelement_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mov z1.d, x8
; VBITS_GE_256-NEXT: ptrue p1.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
-; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z0.d, z1.d
; VBITS_GE_256-NEXT: fmov d0, #5.00000000
; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x0, x8, lsl #3]
-; VBITS_GE_256-NEXT: mov z1.d, p0/m, d0
+; VBITS_GE_256-NEXT: mov z1.d, p2/m, d0
; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [x0]
; VBITS_GE_256-NEXT: st1d { z1.d }, p1, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z0.d }, p1, [x1]
@@ -318,10 +318,10 @@ define void @insertelement_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ptrue p0.d
; VBITS_GE_512-NEXT: mov z1.d, x8
; VBITS_GE_512-NEXT: ptrue p1.d, vl8
-; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; VBITS_GE_512-NEXT: cmpeq p2.d, p0/z, z0.d, z1.d
; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: fmov d1, #5.00000000
-; VBITS_GE_512-NEXT: mov z0.d, p0/m, d1
+; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1
; VBITS_GE_512-NEXT: st1d { z0.d }, p1, [x1]
; VBITS_GE_512-NEXT: ret
%op1 = load <8 x double>, ptr %a
@@ -338,10 +338,10 @@ define void @insertelement_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: ptrue p1.d, vl16
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: cmpeq p2.d, p0/z, z0.d, z1.d
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: fmov d1, #5.00000000
-; CHECK-NEXT: mov z0.d, p0/m, d1
+; CHECK-NEXT: mov z0.d, p2/m, d1
; CHECK-NEXT: st1d { z0.d }, p1, [x1]
; CHECK-NEXT: ret
%op1 = load <16 x double>, ptr %a
@@ -358,10 +358,10 @@ define void @insertelement_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: ptrue p1.d, vl32
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: cmpeq p2.d, p0/z, z0.d, z1.d
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: fmov d1, #5.00000000
-; CHECK-NEXT: mov z0.d, p0/m, d1
+; CHECK-NEXT: mov z0.d, p2/m, d1
; CHECK-NEXT: st1d { z0.d }, p1, [x1]
; CHECK-NEXT: ret
%op1 = load <32 x double>, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
index 37396ba7011be..de6689dfa7e3c 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
@@ -37,10 +37,10 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-NEXT: mov z0.b, w2
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ptrue p1.b, vl32
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpne p2.b, p0/z, z0.b, #0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: sel z0.b, p2, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <32 x i8>, ptr %a
@@ -57,13 +57,13 @@ define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-NEXT: ptrue p0.b
; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ptrue p1.b, vl32
-; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; VBITS_GE_256-NEXT: cmpne p2.b, p0/z, z0.b, #0
; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1b { z2.b }, p1/z, [x1, x8]
; VBITS_GE_256-NEXT: ld1b { z3.b }, p1/z, [x1]
-; VBITS_GE_256-NEXT: sel z0.b, p0, z0.b, z2.b
-; VBITS_GE_256-NEXT: sel z1.b, p0, z1.b, z3.b
+; VBITS_GE_256-NEXT: sel z0.b, p2, z0.b, z2.b
+; VBITS_GE_256-NEXT: sel z1.b, p2, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z0.b }, p1, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p1, [x0]
; VBITS_GE_256-NEXT: ret
@@ -73,10 +73,10 @@ define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: mov z0.b, w2
; VBITS_GE_512-NEXT: ptrue p0.b
; VBITS_GE_512-NEXT: ptrue p1.b, vl64
-; VBITS_GE_512-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; VBITS_GE_512-NEXT: cmpne p2.b, p0/z, z0.b, #0
; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1b { z1.b }, p1/z, [x1]
-; VBITS_GE_512-NEXT: sel z0.b, p0, z0.b, z1.b
+; VBITS_GE_512-NEXT: sel z0.b, p2, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <64 x i8>, ptr %a
@@ -92,10 +92,10 @@ define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-NEXT: mov z0.b, w2
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ptrue p1.b, vl128
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpne p2.b, p0/z, z0.b, #0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: sel z0.b, p2, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <128 x i8>, ptr %a
@@ -111,10 +111,10 @@ define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-NEXT: mov z0.b, w2
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ptrue p1.b, vl256
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpne p2.b, p0/z, z0.b, #0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1]
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: sel z0.b, p2, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <256 x i8>, ptr %a
@@ -157,10 +157,10 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.h, vl16
; CHECK-NEXT: and z0.h, z0.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p2.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: sel z0.h, p2, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <16 x i16>, ptr %a
@@ -178,13 +178,13 @@ define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ptrue p1.h, vl16
; VBITS_GE_256-NEXT: and z0.h, z0.h, #0x1
-; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; VBITS_GE_256-NEXT: cmpne p2.h, p0/z, z0.h, #0
; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1h { z2.h }, p1/z, [x1, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z3.h }, p1/z, [x1]
-; VBITS_GE_256-NEXT: sel z0.h, p0, z0.h, z2.h
-; VBITS_GE_256-NEXT: sel z1.h, p0, z1.h, z3.h
+; VBITS_GE_256-NEXT: sel z0.h, p2, z0.h, z2.h
+; VBITS_GE_256-NEXT: sel z1.h, p2, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z0.h }, p1, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x0]
; VBITS_GE_256-NEXT: ret
@@ -195,10 +195,10 @@ define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ptrue p0.h
; VBITS_GE_512-NEXT: ptrue p1.h, vl32
; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x1
-; VBITS_GE_512-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; VBITS_GE_512-NEXT: cmpne p2.h, p0/z, z0.h, #0
; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1h { z1.h }, p1/z, [x1]
-; VBITS_GE_512-NEXT: sel z0.h, p0, z0.h, z1.h
+; VBITS_GE_512-NEXT: sel z0.h, p2, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <32 x i16>, ptr %a
@@ -215,10 +215,10 @@ define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.h, vl64
; CHECK-NEXT: and z0.h, z0.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p2.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: sel z0.h, p2, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <64 x i16>, ptr %a
@@ -235,10 +235,10 @@ define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.h, vl128
; CHECK-NEXT: and z0.h, z0.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p2.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1]
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: sel z0.h, p2, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <128 x i16>, ptr %a
@@ -281,10 +281,10 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: ptrue p1.s, vl8
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: sel z0.s, p2, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <8 x i32>, ptr %a
@@ -302,13 +302,13 @@ define void @select_v16i32(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-NEXT: mov z0.s, w8
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
-; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; VBITS_GE_256-NEXT: cmpne p2.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z2.s }, p1/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z3.s }, p1/z, [x1]
-; VBITS_GE_256-NEXT: sel z0.s, p0, z0.s, z2.s
-; VBITS_GE_256-NEXT: sel z1.s, p0, z1.s, z3.s
+; VBITS_GE_256-NEXT: sel z0.s, p2, z0.s, z2.s
+; VBITS_GE_256-NEXT: sel z1.s, p2, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x0]
; VBITS_GE_256-NEXT: ret
@@ -319,10 +319,10 @@ define void @select_v16i32(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ptrue p0.s
; VBITS_GE_512-NEXT: mov z0.s, w8
; VBITS_GE_512-NEXT: ptrue p1.s, vl16
-; VBITS_GE_512-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; VBITS_GE_512-NEXT: cmpne p2.s, p0/z, z0.s, #0
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p1/z, [x1]
-; VBITS_GE_512-NEXT: sel z0.s, p0, z0.s, z1.s
+; VBITS_GE_512-NEXT: sel z0.s, p2, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <16 x i32>, ptr %a
@@ -339,10 +339,10 @@ define void @select_v32i32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: ptrue p1.s, vl32
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: sel z0.s, p2, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <32 x i32>, ptr %a
@@ -359,10 +359,10 @@ define void @select_v64i32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: ptrue p1.s, vl64
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: sel z0.s, p2, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <64 x i32>, ptr %a
@@ -406,10 +406,10 @@ define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ptrue p1.d, vl4
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: sel z0.d, p2, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <4 x i64>, ptr %a
@@ -428,13 +428,13 @@ define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-NEXT: mov z0.d, x8
; VBITS_GE_256-NEXT: ptrue p1.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
-; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; VBITS_GE_256-NEXT: cmpne p2.d, p0/z, z0.d, #0
; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z2.d }, p1/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1]
-; VBITS_GE_256-NEXT: sel z0.d, p0, z0.d, z2.d
-; VBITS_GE_256-NEXT: sel z1.d, p0, z1.d, z3.d
+; VBITS_GE_256-NEXT: sel z0.d, p2, z0.d, z2.d
+; VBITS_GE_256-NEXT: sel z1.d, p2, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p1, [x0]
; VBITS_GE_256-NEXT: ret
@@ -446,10 +446,10 @@ define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ptrue p0.d
; VBITS_GE_512-NEXT: mov z0.d, x8
; VBITS_GE_512-NEXT: ptrue p1.d, vl8
-; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; VBITS_GE_512-NEXT: cmpne p2.d, p0/z, z0.d, #0
; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1]
-; VBITS_GE_512-NEXT: sel z0.d, p0, z0.d, z1.d
+; VBITS_GE_512-NEXT: sel z0.d, p2, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load volatile <8 x i64>, ptr %a
@@ -467,10 +467,10 @@ define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ptrue p1.d, vl16
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: sel z0.d, p2, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <16 x i64>, ptr %a
@@ -488,10 +488,10 @@ define void @select_v32i64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ptrue p1.d, vl32
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: sel z0.d, p2, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load volatile <32 x i64>, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
index 291cddf2b8912..551fad6584b2a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
@@ -35,8 +35,8 @@ define void @select_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: st1b { z1.b }, p0, [x0]
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: st1b { z1.b }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
@@ -56,9 +56,9 @@ define void @select_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
-; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z2.b, z3.b
+; VBITS_GE_256-NEXT: cmpne p2.b, p0/z, z2.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p1, [x0, x8]
-; VBITS_GE_256-NEXT: st1b { z3.b }, p0, [x0]
+; VBITS_GE_256-NEXT: st1b { z3.b }, p2, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: select_v64i8:
@@ -66,8 +66,8 @@ define void @select_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1]
-; VBITS_GE_512-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
-; VBITS_GE_512-NEXT: st1b { z1.b }, p0, [x0]
+; VBITS_GE_512-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
+; VBITS_GE_512-NEXT: st1b { z1.b }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <64 x i8>, ptr %a
%op2 = load <64 x i8>, ptr %b
@@ -83,8 +83,8 @@ define void @select_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: st1b { z1.b }, p0, [x0]
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: st1b { z1.b }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <128 x i8>, ptr %a
%op2 = load <128 x i8>, ptr %b
@@ -100,8 +100,8 @@ define void @select_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: st1b { z1.b }, p0, [x0]
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: st1b { z1.b }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <256 x i8>, ptr %a
%op2 = load <256 x i8>, ptr %b
@@ -142,8 +142,8 @@ define void @select_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: st1h { z1.h }, p0, [x0]
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: st1h { z1.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
@@ -163,9 +163,9 @@ define void @select_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpne p1.h, p0/z, z0.h, z1.h
-; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z2.h, z3.h
+; VBITS_GE_256-NEXT: cmpne p2.h, p0/z, z2.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x0, x8, lsl #1]
-; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x0]
+; VBITS_GE_256-NEXT: st1h { z3.h }, p2, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: select_v32i16:
@@ -173,8 +173,8 @@ define void @select_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
-; VBITS_GE_512-NEXT: cmpne p0.h, p0/z, z0.h, z1.h
-; VBITS_GE_512-NEXT: st1h { z1.h }, p0, [x0]
+; VBITS_GE_512-NEXT: cmpne p1.h, p0/z, z0.h, z1.h
+; VBITS_GE_512-NEXT: st1h { z1.h }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <32 x i16>, ptr %a
%op2 = load <32 x i16>, ptr %b
@@ -190,8 +190,8 @@ define void @select_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: st1h { z1.h }, p0, [x0]
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: st1h { z1.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i16>, ptr %a
%op2 = load <64 x i16>, ptr %b
@@ -207,8 +207,8 @@ define void @select_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: st1h { z1.h }, p0, [x0]
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: st1h { z1.h }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <128 x i16>, ptr %a
%op2 = load <128 x i16>, ptr %b
@@ -249,8 +249,8 @@ define void @select_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: st1w { z1.s }, p0, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: st1w { z1.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
@@ -270,9 +270,9 @@ define void @select_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpne p1.s, p0/z, z0.s, z1.s
-; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z2.s, z3.s
+; VBITS_GE_256-NEXT: cmpne p2.s, p0/z, z2.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
-; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0]
+; VBITS_GE_256-NEXT: st1w { z3.s }, p2, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: select_v16i32:
@@ -280,8 +280,8 @@ define void @select_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
-; VBITS_GE_512-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
-; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x0]
+; VBITS_GE_512-NEXT: cmpne p1.s, p0/z, z0.s, z1.s
+; VBITS_GE_512-NEXT: st1w { z1.s }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <16 x i32>, ptr %a
%op2 = load <16 x i32>, ptr %b
@@ -297,8 +297,8 @@ define void @select_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: st1w { z1.s }, p0, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: st1w { z1.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i32>, ptr %a
%op2 = load <32 x i32>, ptr %b
@@ -314,8 +314,8 @@ define void @select_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: st1w { z1.s }, p0, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: st1w { z1.s }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i32>, ptr %a
%op2 = load <64 x i32>, ptr %b
@@ -357,8 +357,8 @@ define void @select_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: st1d { z1.d }, p0, [x0]
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: st1d { z1.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
@@ -378,9 +378,9 @@ define void @select_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z0.d, z1.d
-; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z2.d, z3.d
+; VBITS_GE_256-NEXT: cmpne p2.d, p0/z, z2.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p1, [x0, x8, lsl #3]
-; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0]
+; VBITS_GE_256-NEXT: st1d { z3.d }, p2, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: select_v8i64:
@@ -388,8 +388,8 @@ define void @select_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
-; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z0.d, z1.d
-; VBITS_GE_512-NEXT: st1d { z1.d }, p0, [x0]
+; VBITS_GE_512-NEXT: cmpne p1.d, p0/z, z0.d, z1.d
+; VBITS_GE_512-NEXT: st1d { z1.d }, p1, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <8 x i64>, ptr %a
%op2 = load <8 x i64>, ptr %b
@@ -405,8 +405,8 @@ define void @select_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: st1d { z1.d }, p0, [x0]
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: st1d { z1.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i64>, ptr %a
%op2 = load <16 x i64>, ptr %b
@@ -422,8 +422,8 @@ define void @select_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: st1d { z1.d }, p0, [x0]
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: st1d { z1.d }, p1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i64>, ptr %a
%op2 = load <32 x i64>, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
index 69fb3d88af02a..b2af99bd96ab6 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
@@ -13,8 +13,8 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%load = call <16 x i8> @llvm.masked.load.v16i8(ptr %src, i32 8, <16 x i1> %mask, <16 x i8> zeroinitializer)
@@ -27,8 +27,8 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: shl v0.8h, v0.8h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%load = call <8 x half> @llvm.masked.load.v8f16(ptr %src, i32 8, <8 x i1> %mask, <8 x half> zeroinitializer)
@@ -41,8 +41,8 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%load = call <4 x float> @llvm.masked.load.v4f32(ptr %src, i32 8, <4 x i1> %mask, <4 x float> zeroinitializer)
@@ -55,8 +55,8 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> zeroinitializer)
@@ -70,9 +70,9 @@ define <2 x double> @masked_load_passthru_v2f64(ptr %src, <2 x i1> %mask, <2 x d
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
+; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> %passthru)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll
index 4570b50c96ad3..660213d1c7f33 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll
@@ -13,9 +13,9 @@ define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: st1b { z0.b }, p1, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
ret void
@@ -27,9 +27,9 @@ define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: shl v0.8h, v0.8h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
ret void
@@ -41,9 +41,9 @@ define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
ret void
@@ -55,9 +55,9 @@ define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask)
ret void
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
index a0948339b27bc..7aaa475e021b3 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
@@ -18,10 +18,10 @@ define void @masked_gather_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: cmeq v0.2s, v0.2s, #0
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: ldr q0, [x1]
-; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ptrue p0.s, vl2
+; CHECK-NEXT: ld1b { z0.d }, p1/z, [z0.d]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: st1b { z0.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -139,12 +139,12 @@ define void @masked_gather_v32i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
-; CHECK-NEXT: ptrue p1.d, vl32
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1]
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
+; CHECK-NEXT: ptrue p1.d, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1]
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: st1b { z0.d }, p1, [x0]
@@ -169,10 +169,10 @@ define void @masked_gather_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: cmeq v0.2s, v0.2s, #0
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: ldr q0, [x1]
-; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ptrue p0.s, vl2
+; CHECK-NEXT: ld1h { z0.d }, p1/z, [z0.d]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -259,14 +259,14 @@ define void @masked_gather_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
-; CHECK-NEXT: ptrue p1.d, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1]
-; CHECK-NEXT: punpklo p0.h, p0.b
-; CHECK-NEXT: punpklo p0.h, p0.b
-; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
-; CHECK-NEXT: st1h { z0.d }, p1, [x0]
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: ptrue p0.d, vl16
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
+; CHECK-NEXT: punpklo p1.h, p1.b
+; CHECK-NEXT: punpklo p1.h, p1.b
+; CHECK-NEXT: ld1h { z0.d }, p1/z, [z0.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%cval = load <16 x i16>, ptr %a
%ptrs = load <16 x ptr>, ptr %b
@@ -280,14 +280,14 @@ define void @masked_gather_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
-; CHECK-NEXT: ptrue p1.d, vl32
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1]
-; CHECK-NEXT: punpklo p0.h, p0.b
-; CHECK-NEXT: punpklo p0.h, p0.b
-; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
-; CHECK-NEXT: st1h { z0.d }, p1, [x0]
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: ptrue p0.d, vl32
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
+; CHECK-NEXT: punpklo p1.h, p1.b
+; CHECK-NEXT: punpklo p1.h, p1.b
+; CHECK-NEXT: ld1h { z0.d }, p1/z, [z0.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%cval = load <32 x i16>, ptr %a
%ptrs = load <32 x ptr>, ptr %b
@@ -308,9 +308,9 @@ define void @masked_gather_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: cmeq v0.2d, v0.2d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: ldr q0, [x1]
-; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
@@ -371,13 +371,13 @@ define void @masked_gather_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_512-LABEL: masked_gather_v8i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
-; VBITS_GE_512-NEXT: ptrue p1.d, vl8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
-; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x1]
-; VBITS_GE_512-NEXT: punpklo p0.h, p0.b
-; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z0.d]
-; VBITS_GE_512-NEXT: st1w { z0.d }, p1, [x0]
+; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; VBITS_GE_512-NEXT: ptrue p0.d, vl8
+; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
+; VBITS_GE_512-NEXT: punpklo p1.h, p1.b
+; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [z0.d]
+; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%cval = load <8 x i32>, ptr %a
%ptrs = load <8 x ptr>, ptr %b
@@ -391,13 +391,13 @@ define void @masked_gather_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_gather_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
-; CHECK-NEXT: ptrue p1.d, vl16
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1]
-; CHECK-NEXT: punpklo p0.h, p0.b
-; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
-; CHECK-NEXT: st1w { z0.d }, p1, [x0]
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ptrue p0.d, vl16
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
+; CHECK-NEXT: punpklo p1.h, p1.b
+; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d]
+; CHECK-NEXT: st1w { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%cval = load <16 x i32>, ptr %a
%ptrs = load <16 x ptr>, ptr %b
@@ -411,13 +411,13 @@ define void @masked_gather_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_gather_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
-; CHECK-NEXT: ptrue p1.d, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1]
-; CHECK-NEXT: punpklo p0.h, p0.b
-; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
-; CHECK-NEXT: st1w { z0.d }, p1, [x0]
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ptrue p0.d, vl32
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
+; CHECK-NEXT: punpklo p1.h, p1.b
+; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d]
+; CHECK-NEXT: st1w { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%cval = load <32 x i32>, ptr %a
%ptrs = load <32 x ptr>, ptr %b
@@ -458,9 +458,9 @@ define void @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-NEXT: ldr q0, [x1]
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ld1d { z0.d }, p1/z, [z0.d]
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%cval = load <2 x i64>, ptr %a
@@ -576,9 +576,9 @@ define void @masked_gather_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: mov v0.h[1], w8
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: ldr q0, [x1]
-; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ld1h { z0.d }, p1/z, [z0.d]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: str s0, [x0]
@@ -715,9 +715,9 @@ define void @masked_gather_v2f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: fcmeq v0.2s, v0.2s, #0.0
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: ldr q0, [x1]
-; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d]
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
@@ -867,9 +867,9 @@ define void @masked_gather_v2f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: fcmeq v0.2d, v0.2d, #0.0
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: ldr q0, [x1]
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ld1d { z0.d }, p1/z, [z0.d]
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%cval = load <2 x double>, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
index 864a9f7987bdb..2a3a8d00641ac 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
@@ -21,8 +21,8 @@ define <2 x half> @masked_load_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-NEXT: mov v0.h[0], v1.h[0]
; CHECK-NEXT: mov w8, v1.s[1]
; CHECK-NEXT: mov v0.h[1], w8
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%a = load <2 x half>, ptr %ap
@@ -39,8 +39,8 @@ define <2 x float> @masked_load_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%a = load <2 x float>, ptr %ap
@@ -57,8 +57,8 @@ define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(1,0) #0 {
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%a = load <4 x float>, ptr %ap
@@ -372,9 +372,9 @@ define void @masked_load_sext_v32i8i16(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b
@@ -405,9 +405,9 @@ define void @masked_load_sext_v16i8i32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.b, vl16
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
-; VBITS_GE_256-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b
@@ -439,9 +439,9 @@ define void @masked_load_sext_v8i8i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.b, vl8
; VBITS_GE_256-NEXT: ldr d0, [x1]
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
-; VBITS_GE_256-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
@@ -474,9 +474,9 @@ define void @masked_load_sext_v16i16i32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
+; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
@@ -507,9 +507,9 @@ define void @masked_load_sext_v8i16i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
-; VBITS_GE_256-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
@@ -541,9 +541,9 @@ define void @masked_load_sext_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
@@ -574,9 +574,9 @@ define void @masked_load_zext_v32i8i16(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b
@@ -607,9 +607,9 @@ define void @masked_load_zext_v16i8i32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.b, vl16
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
-; VBITS_GE_256-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b
@@ -641,9 +641,9 @@ define void @masked_load_zext_v8i8i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.b, vl8
; VBITS_GE_256-NEXT: ldr d0, [x1]
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
-; VBITS_GE_256-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
@@ -676,9 +676,9 @@ define void @masked_load_zext_v16i16i32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
+; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
@@ -709,9 +709,9 @@ define void @masked_load_zext_v8i16i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
-; VBITS_GE_256-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
@@ -743,9 +743,9 @@ define void @masked_load_zext_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
@@ -786,8 +786,8 @@ define void @masked_load_sext_v32i8i16_m16(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: splice z1.b, p1, z1.b, z0.b
; VBITS_GE_256-NEXT: ptrue p1.b, vl32
-; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z1.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.b, p1/z, z1.b, #0
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p2/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b
@@ -829,8 +829,8 @@ define void @masked_load_sext_v16i8i32_m32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
-; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z1.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.b, p1/z, z1.b, #0
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p2/z, [x0]
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b
@@ -874,8 +874,8 @@ define void @masked_load_sext_v8i8i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p1.b, vl8
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
-; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z0.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.b, p1/z, z0.b, #0
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p2/z, [x0]
; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
@@ -920,8 +920,8 @@ define void @masked_load_sext_v16i16i32_m32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: sunpklo z0.h, z1.b
-; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z0.h, #0
-; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.h, p1/z, z0.h, #0
+; VBITS_GE_256-NEXT: ld1h { z0.h }, p2/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
@@ -963,8 +963,8 @@ define void @masked_load_sext_v8i16i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: ptrue p1.h, vl8
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
-; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z0.h, #0
-; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.h, p1/z, z0.h, #0
+; VBITS_GE_256-NEXT: ld1h { z0.h }, p2/z, [x0]
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
@@ -1006,8 +1006,8 @@ define void @masked_load_sext_v8i32i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
-; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z1.s, #0
-; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.s, p1/z, z1.s, #0
+; VBITS_GE_256-NEXT: ld1w { z0.s }, p2/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
@@ -1048,8 +1048,8 @@ define void @masked_load_zext_v32i8i16_m16(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: splice z1.b, p1, z1.b, z0.b
; VBITS_GE_256-NEXT: ptrue p1.b, vl32
-; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z1.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.b, p1/z, z1.b, #0
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p2/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b
@@ -1091,8 +1091,8 @@ define void @masked_load_zext_v16i8i32_m32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
-; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z1.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.b, p1/z, z1.b, #0
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p2/z, [x0]
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b
@@ -1136,8 +1136,8 @@ define void @masked_load_zext_v8i8i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p1.b, vl8
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
-; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z0.b, #0
-; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.b, p1/z, z0.b, #0
+; VBITS_GE_256-NEXT: ld1b { z0.b }, p2/z, [x0]
; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
@@ -1182,8 +1182,8 @@ define void @masked_load_zext_v16i16i32_m32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: sunpklo z0.h, z1.b
-; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z0.h, #0
-; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.h, p1/z, z0.h, #0
+; VBITS_GE_256-NEXT: ld1h { z0.h }, p2/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
@@ -1225,8 +1225,8 @@ define void @masked_load_zext_v8i16i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: ptrue p1.h, vl8
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
-; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z0.h, #0
-; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.h, p1/z, z0.h, #0
+; VBITS_GE_256-NEXT: ld1h { z0.h }, p2/z, [x0]
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
@@ -1268,8 +1268,8 @@ define void @masked_load_zext_v8i32i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
-; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z1.s, #0
-; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p2.s, p1/z, z1.s, #0
+; VBITS_GE_256-NEXT: ld1w { z0.s }, p2/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
@@ -1504,9 +1504,9 @@ define void @masked_load_sext_ugt_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpne p1.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
@@ -1537,9 +1537,9 @@ define void @masked_load_zext_sgt_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpgt p0.s, p0/z, z0.s, #0
-; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_256-NEXT: cmpgt p1.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
index 4fb3bf7392d4e..2d9ff913a1e9d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
@@ -19,9 +19,9 @@ define void @masked_scatter_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: cmeq v1.2s, v0.2s, #0
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: sshll v1.2d, v1.2s, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: st1b { z0.d }, p0, [z1.d]
+; CHECK-NEXT: st1b { z0.d }, p1, [z1.d]
; CHECK-NEXT: ret
%vals = load <2 x i8>, ptr %a
%ptrs = load <2 x ptr>, ptr %b
@@ -78,9 +78,9 @@ define void @masked_scatter_v8i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z2.d, #0
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0
; VBITS_GE_256-NEXT: st1b { z3.d }, p1, [z2.d]
-; VBITS_GE_256-NEXT: st1b { z0.d }, p0, [z4.d]
+; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; VBITS_GE_256-NEXT: st1b { z0.d }, p1, [z4.d]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_scatter_v8i8:
@@ -132,14 +132,14 @@ define void @masked_scatter_v32i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
-; CHECK-NEXT: ptrue p1.d, vl32
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: uunpklo z1.h, z0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-NEXT: uunpklo z0.s, z1.h
+; CHECK-NEXT: punpklo p0.h, p1.b
+; CHECK-NEXT: ptrue p1.d, vl32
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: st1b { z0.d }, p0, [z1.d]
@@ -164,9 +164,9 @@ define void @masked_scatter_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: cmeq v1.2s, v0.2s, #0
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: sshll v1.2d, v1.2s, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
+; CHECK-NEXT: st1h { z0.d }, p1, [z1.d]
; CHECK-NEXT: ret
%vals = load <2 x i16>, ptr %a
%ptrs = load <2 x ptr>, ptr %b
@@ -208,17 +208,17 @@ define void @masked_scatter_v8i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h
; VBITS_GE_256-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: uunpklo z3.d, z3.s
-; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
+; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s
+; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
-; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0
; VBITS_GE_256-NEXT: st1h { z3.d }, p1, [z2.d]
-; VBITS_GE_256-NEXT: st1h { z0.d }, p0, [z4.d]
+; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; VBITS_GE_256-NEXT: st1h { z0.d }, p1, [z4.d]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_scatter_v8i16:
@@ -245,14 +245,14 @@ define void @masked_scatter_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_scatter_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
-; CHECK-NEXT: ptrue p1.d, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ptrue p0.d, vl16
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: punpklo p0.h, p0.b
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p1.h, p1.b
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
%vals = load <16 x i16>, ptr %a
@@ -266,14 +266,14 @@ define void @masked_scatter_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
-; CHECK-NEXT: ptrue p1.d, vl32
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
-; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ptrue p0.d, vl32
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: punpklo p0.h, p0.b
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p1.h, p1.b
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
%vals = load <32 x i16>, ptr %a
@@ -294,9 +294,9 @@ define void @masked_scatter_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: cmeq v1.2d, v0.2d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
+; CHECK-NEXT: st1w { z0.d }, p1, [z1.d]
; CHECK-NEXT: ret
%vals = load <2 x i32>, ptr %a
%ptrs = load <2 x ptr>, ptr %b
@@ -329,33 +329,33 @@ define void @masked_scatter_v8i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
-; VBITS_GE_256-NEXT: ptrue p1.d, vl4
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
-; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1]
-; VBITS_GE_256-NEXT: ld1d { z4.d }, p1/z, [x1, x8, lsl #3]
-; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: uunpklo z2.d, z0.s
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
+; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
+; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3]
+; VBITS_GE_256-NEXT: mov z1.s, p1/z, #-1 // =0xffffffffffffffff
+; VBITS_GE_256-NEXT: punpklo p1.h, p1.b
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
-; VBITS_GE_256-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: punpklo p0.h, p0.b
-; VBITS_GE_256-NEXT: and p0.b, p0/z, p0.b, p1.b
+; VBITS_GE_256-NEXT: and p1.b, p1/z, p1.b, p0.b
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16
-; VBITS_GE_256-NEXT: st1w { z2.d }, p0, [z3.d]
+; VBITS_GE_256-NEXT: st1w { z2.d }, p1, [z3.d]
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
-; VBITS_GE_256-NEXT: cmpne p0.d, p1/z, z1.d, #0
-; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [z4.d]
+; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; VBITS_GE_256-NEXT: st1w { z0.d }, p1, [z4.d]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_scatter_v8i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
-; VBITS_GE_512-NEXT: ptrue p1.d, vl8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
-; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1]
-; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
-; VBITS_GE_512-NEXT: punpklo p0.h, p0.b
+; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
+; VBITS_GE_512-NEXT: punpklo p0.h, p1.b
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [z1.d]
; VBITS_GE_512-NEXT: ret
%vals = load <8 x i32>, ptr %a
@@ -369,12 +369,12 @@ define void @masked_scatter_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_scatter_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
-; CHECK-NEXT: ptrue p1.d, vl16
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
%vals = load <16 x i32>, ptr %a
@@ -388,12 +388,12 @@ define void @masked_scatter_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_scatter_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
-; CHECK-NEXT: ptrue p1.d, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1]
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
%vals = load <32 x i32>, ptr %a
@@ -433,8 +433,8 @@ define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: st1d { z0.d }, p1, [z1.d]
; CHECK-NEXT: ret
%vals = load <2 x i64>, ptr %a
%ptrs = load <2 x ptr>, ptr %b
@@ -465,13 +465,13 @@ define void @masked_scatter_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0]
-; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
+; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0
-; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z2.d, #0
; VBITS_GE_256-NEXT: st1d { z0.d }, p1, [z1.d]
-; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [z3.d]
+; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z2.d, #0
+; VBITS_GE_256-NEXT: st1d { z2.d }, p1, [z3.d]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_scatter_v8i64:
@@ -539,10 +539,10 @@ define void @masked_scatter_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: mov v0.h[1], w8
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: uunpklo z0.d, z1.s
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
+; CHECK-NEXT: st1h { z0.d }, p1, [z1.d]
; CHECK-NEXT: ret
%vals = load <2 x half>, ptr %a
%ptrs = load <2 x ptr>, ptr %b
@@ -584,17 +584,17 @@ define void @masked_scatter_v8f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h
; VBITS_GE_256-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: uunpklo z3.d, z3.s
-; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
+; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s
+; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
-; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1]
-; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0
; VBITS_GE_256-NEXT: st1h { z3.d }, p1, [z2.d]
-; VBITS_GE_256-NEXT: st1h { z0.d }, p0, [z4.d]
+; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; VBITS_GE_256-NEXT: st1h { z0.d }, p1, [z4.d]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_scatter_v8f16:
@@ -671,9 +671,9 @@ define void @masked_scatter_v2f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: fcmeq v1.2s, v0.2s, #0.0
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: sshll v1.2d, v1.2s, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
+; CHECK-NEXT: st1w { z0.d }, p1, [z1.d]
; CHECK-NEXT: ret
%vals = load <2 x float>, ptr %a
%ptrs = load <2 x ptr>, ptr %b
@@ -810,9 +810,9 @@ define void @masked_scatter_v2f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: fcmeq v1.2d, v0.2d, #0.0
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
+; CHECK-NEXT: st1d { z0.d }, p1, [z1.d]
; CHECK-NEXT: ret
%vals = load <2 x double>, ptr %a
%ptrs = load <2 x ptr>, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
index b0d4f79aea110..fb3a2ae005a08 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
@@ -21,8 +21,8 @@ define void @masked_store_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-NEXT: mov v0.h[0], v2.h[0]
; CHECK-NEXT: mov w8, v2.s[1]
; CHECK-NEXT: mov v0.h[1], w8
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: st1h { z1.h }, p1, [x1]
; CHECK-NEXT: ret
%a = load <2 x half>, ptr %ap
%b = load <2 x half>, ptr %bp
@@ -38,8 +38,8 @@ define void @masked_store_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: fcmeq v1.2s, v0.2s, v1.2s
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: st1w { z0.s }, p0, [x1]
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: st1w { z0.s }, p1, [x1]
; CHECK-NEXT: ret
%a = load <2 x float>, ptr %ap
%b = load <2 x float>, ptr %bp
@@ -55,8 +55,8 @@ define void @masked_store_v4f32(ptr %ap, ptr %bp) vscale_range(1,0) #0 {
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: fcmeq v1.4s, v0.4s, v1.4s
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: st1w { z0.s }, p0, [x1]
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: st1w { z0.s }, p1, [x1]
; CHECK-NEXT: ret
%a = load <4 x float>, ptr %ap
%b = load <4 x float>, ptr %bp
@@ -154,18 +154,18 @@ define void @masked_store_trunc_v8i64i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
-; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z1.d, z3.d
+; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, z3.d
+; VBITS_GE_256-NEXT: ptrue p0.s, vl4
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
-; VBITS_GE_256-NEXT: mov z3.d, p0/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p0.s, vl4
-; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
+; VBITS_GE_256-NEXT: mov z3.d, p2/z, #-1 // =0xffffffffffffffff
+; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s
; VBITS_GE_256-NEXT: uzp1 z3.s, z3.s, z3.s
; VBITS_GE_256-NEXT: splice z3.s, p0, z3.s, z2.s
-; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z3.s, #0
-; VBITS_GE_256-NEXT: st1b { z1.s }, p1, [x2]
+; VBITS_GE_256-NEXT: cmpne p2.s, p1/z, z3.s, #0
+; VBITS_GE_256-NEXT: st1b { z1.s }, p2, [x2]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_store_trunc_v8i64i8:
@@ -173,8 +173,8 @@ define void @masked_store_trunc_v8i64i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
-; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
-; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [x2]
+; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
+; VBITS_GE_512-NEXT: st1b { z0.d }, p1, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <8 x i64>, ptr %ap
%b = load <8 x i64>, ptr %bp
@@ -195,21 +195,21 @@ define void @masked_store_trunc_v8i64i16(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
-; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z1.d, z3.d
+; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, z3.d
+; VBITS_GE_256-NEXT: ptrue p0.s, vl4
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
-; VBITS_GE_256-NEXT: mov z3.d, p0/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p0.s, vl4
+; VBITS_GE_256-NEXT: mov z3.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: uzp1 z3.s, z3.s, z3.s
; VBITS_GE_256-NEXT: splice z3.s, p0, z3.s, z2.s
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
; VBITS_GE_256-NEXT: uzp1 z2.h, z3.h, z3.h
-; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2]
+; VBITS_GE_256-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x2]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_store_trunc_v8i64i16:
@@ -217,8 +217,8 @@ define void @masked_store_trunc_v8i64i16(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
-; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
-; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [x2]
+; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
+; VBITS_GE_512-NEXT: st1h { z0.d }, p1, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <8 x i64>, ptr %ap
%b = load <8 x i64>, ptr %bp
@@ -239,18 +239,18 @@ define void @masked_store_trunc_v8i64i32(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
-; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z1.d, z3.d
+; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, z3.d
+; VBITS_GE_256-NEXT: ptrue p0.s, vl4
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
-; VBITS_GE_256-NEXT: mov z3.d, p0/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p0.s, vl4
-; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
+; VBITS_GE_256-NEXT: mov z3.d, p2/z, #-1 // =0xffffffffffffffff
+; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s
; VBITS_GE_256-NEXT: uzp1 z3.s, z3.s, z3.s
; VBITS_GE_256-NEXT: splice z3.s, p0, z3.s, z2.s
-; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z3.s, #0
-; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x2]
+; VBITS_GE_256-NEXT: cmpne p2.s, p1/z, z3.s, #0
+; VBITS_GE_256-NEXT: st1w { z1.s }, p2, [x2]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_store_trunc_v8i64i32:
@@ -258,8 +258,8 @@ define void @masked_store_trunc_v8i64i32(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
-; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
-; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x2]
+; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
+; VBITS_GE_512-NEXT: st1w { z0.d }, p1, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <8 x i64>, ptr %ap
%b = load <8 x i64>, ptr %bp
@@ -280,21 +280,21 @@ define void @masked_store_trunc_v16i32i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z1.s, z3.s
+; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, z3.s
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
+; VBITS_GE_256-NEXT: ptrue p0.b, vl16
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
-; VBITS_GE_256-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p0.b, vl16
+; VBITS_GE_256-NEXT: mov z3.s, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: uzp1 z3.h, z3.h, z3.h
; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b
; VBITS_GE_256-NEXT: uzp1 z3.b, z3.b, z3.b
; VBITS_GE_256-NEXT: mov v3.d[1], v2.d[0]
-; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z3.b, #0
-; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x2]
+; VBITS_GE_256-NEXT: cmpne p1.b, p0/z, z3.b, #0
+; VBITS_GE_256-NEXT: st1b { z1.b }, p1, [x2]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_store_trunc_v16i32i8:
@@ -302,8 +302,8 @@ define void @masked_store_trunc_v16i32i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
-; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
-; VBITS_GE_512-NEXT: st1b { z0.s }, p0, [x2]
+; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
+; VBITS_GE_512-NEXT: st1b { z0.s }, p1, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <16 x i32>, ptr %ap
%b = load <16 x i32>, ptr %bp
@@ -324,12 +324,12 @@ define void @masked_store_trunc_v16i32i16(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
-; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z1.s, z3.s
+; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, z3.s
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
+; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.h, vl8
-; VBITS_GE_256-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p0.h, vl16
+; VBITS_GE_256-NEXT: mov z3.s, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: splice z1.h, p1, z1.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h
; VBITS_GE_256-NEXT: uzp1 z3.h, z3.h, z3.h
@@ -337,8 +337,8 @@ define void @masked_store_trunc_v16i32i16(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-NEXT: uzp1 z3.b, z3.b, z3.b
; VBITS_GE_256-NEXT: mov v3.d[1], v2.d[0]
; VBITS_GE_256-NEXT: sunpklo z2.h, z3.b
-; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2]
+; VBITS_GE_256-NEXT: cmpne p2.h, p0/z, z2.h, #0
+; VBITS_GE_256-NEXT: st1h { z1.h }, p2, [x2]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_store_trunc_v16i32i16:
@@ -346,8 +346,8 @@ define void @masked_store_trunc_v16i32i16(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
-; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
-; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x2]
+; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
+; VBITS_GE_512-NEXT: st1h { z0.s }, p1, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <16 x i32>, ptr %ap
%b = load <16 x i32>, ptr %bp
@@ -368,18 +368,18 @@ define void @masked_store_trunc_v32i16i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, z2.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
-; VBITS_GE_256-NEXT: cmpeq p0.h, p0/z, z1.h, z3.h
+; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z1.h, z3.h
+; VBITS_GE_256-NEXT: ptrue p0.b, vl16
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.b, vl32
-; VBITS_GE_256-NEXT: mov z3.h, p0/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p0.b, vl16
-; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b
; VBITS_GE_256-NEXT: splice z1.b, p0, z1.b, z0.b
+; VBITS_GE_256-NEXT: mov z3.h, p2/z, #-1 // =0xffffffffffffffff
+; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b
; VBITS_GE_256-NEXT: uzp1 z3.b, z3.b, z3.b
; VBITS_GE_256-NEXT: splice z3.b, p0, z3.b, z2.b
-; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z3.b, #0
-; VBITS_GE_256-NEXT: st1b { z1.b }, p1, [x2]
+; VBITS_GE_256-NEXT: cmpne p2.b, p1/z, z3.b, #0
+; VBITS_GE_256-NEXT: st1b { z1.b }, p2, [x2]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_store_trunc_v32i16i8:
@@ -387,8 +387,8 @@ define void @masked_store_trunc_v32i16i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
-; VBITS_GE_512-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
-; VBITS_GE_512-NEXT: st1b { z0.h }, p0, [x2]
+; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
+; VBITS_GE_512-NEXT: st1b { z0.h }, p1, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <32 x i16>, ptr %ap
%b = load <32 x i16>, ptr %bp
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
index 3e6a7ce34a9ae..ecdff4bdde910 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
@@ -400,8 +400,8 @@ define <8 x i1> @no_warn_dropped_scalable(ptr %in) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index 777577e2d3b49..840564ddb86ea 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -98,8 +98,8 @@ define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: mov w8, #30 // =0x1e
-; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: mov z0.d, p0/m, x8
+; CHECK-NEXT: cmpeq p1.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: mov z0.d, p1/m, x8
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4
ret <vscale x 2 x i64> %b
@@ -113,9 +113,9 @@ define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z2.h, w8
-; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: fmov h1, #1.00000000
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: mov z0.h, p1/m, h1
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9
ret <vscale x 8 x half> %b
@@ -128,8 +128,8 @@ define <vscale x 8 x bfloat> @test_lane9_8xbf16(<vscale x 8 x bfloat> %a, bfloat
; CHECK-NEXT: index z2.h, #0, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z3.h, w8
-; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: cmpeq p1.h, p0/z, z2.h, z3.h
+; CHECK-NEXT: mov z0.h, p1/m, h1
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 9
ret <vscale x 8 x bfloat> %b
@@ -143,8 +143,8 @@ define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: mov w8, #30 // =0x1e
-; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, w8
+; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
+; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1
ret <vscale x 16 x i8> %b
@@ -158,8 +158,8 @@ define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: mov w8, #30 // =0x1e
-; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, w8
+; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
+; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x
ret <vscale x 16 x i8> %b
@@ -228,8 +228,8 @@ define <vscale x 16 x i8> @test_insert64_of_extract64_16xi8(<vscale x 16 x i8> %
; CHECK-NEXT: lastb w9, p0, z1.b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, w9
+; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
+; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: ret
%c = extractelement <vscale x 16 x i8> %b, i32 64
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 64
@@ -244,8 +244,8 @@ define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a,
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z3.b, w8
; CHECK-NEXT: umov w8, v1.b[1]
-; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b
-; CHECK-NEXT: mov z0.b, p0/m, w8
+; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
+; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: ret
%c = extractelement <vscale x 16 x i8> %b, i32 1
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3
@@ -340,8 +340,8 @@ define <vscale x 2 x half> @test_insert_with_index_nxv2f16(<vscale x 2 x half> %
; CHECK-NEXT: index z2.d, #0, #1
; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: cmpeq p1.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: mov z0.h, p1/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x half> %a, half %h, i64 %idx
ret <vscale x 2 x half> %res
@@ -353,8 +353,8 @@ define <vscale x 4 x half> @test_insert_with_index_nxv4f16(<vscale x 4 x half> %
; CHECK-NEXT: index z2.s, #0, #1
; CHECK-NEXT: mov z3.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: cmpeq p1.s, p0/z, z2.s, z3.s
+; CHECK-NEXT: mov z0.h, p1/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x half> %a, half %h, i64 %idx
ret <vscale x 4 x half> %res
@@ -366,8 +366,8 @@ define <vscale x 8 x half> @test_insert_with_index_nxv8f16(<vscale x 8 x half> %
; CHECK-NEXT: index z2.h, #0, #1
; CHECK-NEXT: mov z3.h, w0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: cmpeq p1.h, p0/z, z2.h, z3.h
+; CHECK-NEXT: mov z0.h, p1/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x half> %a, half %h, i64 %idx
ret <vscale x 8 x half> %res
@@ -379,8 +379,8 @@ define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(<vscale x 2 x bflo
; CHECK-NEXT: index z2.d, #0, #1
; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: cmpeq p1.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: mov z0.h, p1/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x bfloat> %a, bfloat %h, i64 %idx
ret <vscale x 2 x bfloat> %res
@@ -392,8 +392,8 @@ define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(<vscale x 4 x bflo
; CHECK-NEXT: index z2.s, #0, #1
; CHECK-NEXT: mov z3.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: cmpeq p1.s, p0/z, z2.s, z3.s
+; CHECK-NEXT: mov z0.h, p1/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x bfloat> %a, bfloat %h, i64 %idx
ret <vscale x 4 x bfloat> %res
@@ -405,8 +405,8 @@ define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(<vscale x 8 x bflo
; CHECK-NEXT: index z2.h, #0, #1
; CHECK-NEXT: mov z3.h, w0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: cmpeq p1.h, p0/z, z2.h, z3.h
+; CHECK-NEXT: mov z0.h, p1/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x bfloat> %a, bfloat %h, i64 %idx
ret <vscale x 8 x bfloat> %res
@@ -418,8 +418,8 @@ define <vscale x 2 x float> @test_insert_with_index_nxv2f32(<vscale x 2 x float>
; CHECK-NEXT: index z2.d, #0, #1
; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
-; CHECK-NEXT: mov z0.s, p0/m, s1
+; CHECK-NEXT: cmpeq p1.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: mov z0.s, p1/m, s1
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x float> %a, float %f, i64 %idx
ret <vscale x 2 x float> %res
@@ -431,8 +431,8 @@ define <vscale x 4 x float> @test_insert_with_index_nxv4f32(<vscale x 4 x float>
; CHECK-NEXT: index z2.s, #0, #1
; CHECK-NEXT: mov z3.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
-; CHECK-NEXT: mov z0.s, p0/m, s1
+; CHECK-NEXT: cmpeq p1.s, p0/z, z2.s, z3.s
+; CHECK-NEXT: mov z0.s, p1/m, s1
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x float> %a, float %f, i64 %idx
ret <vscale x 4 x float> %res
@@ -444,8 +444,8 @@ define <vscale x 2 x double> @test_insert_with_index_nxv2f64(<vscale x 2 x doubl
; CHECK-NEXT: index z2.d, #0, #1
; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
-; CHECK-NEXT: mov z0.d, p0/m, d1
+; CHECK-NEXT: cmpeq p1.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: mov z0.d, p1/m, d1
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x double> %a, double %d, i64 %idx
ret <vscale x 2 x double> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
index a2061579c8c5e..1b6b1409d6016 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
@@ -1039,8 +1039,8 @@ define <vscale x 1 x i1> @cmpne_ir_q(<vscale x 1 x i64> %a, <vscale x 1 x i64> %
; CHECK-LABEL: cmpne_ir_q:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: ret
%out = icmp ne <vscale x 1 x i64> %a, %b
ret <vscale x 1 x i1> %out
@@ -1078,8 +1078,8 @@ define <vscale x 4 x i1> @predicated_icmp(<vscale x 4 x i32> %a, <vscale x 4 x i
; CHECK-LABEL: predicated_icmp:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: cmpge p0.s, p0/z, z2.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p0.s, p1/z, z2.s, z1.s
; CHECK-NEXT: ret
%icmp1 = icmp sgt <vscale x 4 x i32> %a, %b
%icmp2 = icmp sle <vscale x 4 x i32> %b, %c
diff --git a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll
index e5ab956d09e8a..db87f889794b6 100644
--- a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll
@@ -43,12 +43,12 @@ define void @test_post_ld1_int_fixed(ptr %data, i64 %idx, ptr %addr, ptr %res_pt
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: ldr z2, [x2]
; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ptrue p1.d, vl1
; CHECK-NEXT: ldr x9, [x0, x1, lsl #3]
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: mov z0.d, p1/m, x8
-; CHECK-NEXT: mov z2.d, p0/m, x9
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: mov z0.d, p0/m, x8
+; CHECK-NEXT: mov z2.d, p1/m, x9
; CHECK-NEXT: add z0.d, z0.d, z2.d
; CHECK-NEXT: str z0, [x3]
; CHECK-NEXT: ret
@@ -70,13 +70,13 @@ define void @test_post_ld1_double_fixed(ptr %data, i64 %idx, ptr %addr, ptr %res
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
-; CHECK-NEXT: ptrue p1.d, vl1
; CHECK-NEXT: ldr d2, [x0, x1, lsl #3]
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
; CHECK-NEXT: ldr z0, [x2]
; CHECK-NEXT: ldr d1, [x0]
-; CHECK-NEXT: sel z1.d, p1, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, p0/m, d2
+; CHECK-NEXT: ptrue p0.d, vl1
+; CHECK-NEXT: sel z1.d, p0, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, p1/m, d2
; CHECK-NEXT: fadd z0.d, z1.d, z0.d
; CHECK-NEXT: str z0, [x3]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll b/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll
index 3a6e9818e07fa..dce53afc8c845 100644
--- a/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll
+++ b/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll
@@ -6,8 +6,8 @@ define void @sve_load_compare_store(ptr noalias nocapture noundef readonly %a, p
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: st1b { z0.s }, p0, [x1]
+; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: st1b { z0.s }, p1, [x1]
; CHECK-NEXT: ret
entry:
%0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-compressstore.ll b/llvm/test/CodeGen/AArch64/sve-masked-compressstore.ll
index c698658afc8c4..df449f79cc9b5 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-compressstore.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-compressstore.ll
@@ -121,9 +121,9 @@ define void @test_compressstore_v2f64(ptr %p, <2 x double> %vec, <2 x i1> %mask)
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: shl v1.2d, v1.2d, #63
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; CHECK-NEXT: cntp x8, p0, p0.d
-; CHECK-NEXT: compact z0.d, p0, z0.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; CHECK-NEXT: cntp x8, p1, p1.d
+; CHECK-NEXT: compact z0.d, p1, z0.d
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -138,9 +138,9 @@ define void @test_compressstore_v4i32(ptr %p, <4 x i32> %vec, <4 x i1> %mask) {
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: cntp x8, p0, p0.s
-; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: cntp x8, p1, p1.s
+; CHECK-NEXT: compact z0.s, p1, z0.s
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -155,9 +155,9 @@ define void @test_compressstore_v2i64(ptr %p, <2 x i64> %vec, <2 x i1> %mask) {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: shl v1.2d, v1.2d, #63
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; CHECK-NEXT: cntp x8, p0, p0.d
-; CHECK-NEXT: compact z0.d, p0, z0.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; CHECK-NEXT: cntp x8, p1, p1.d
+; CHECK-NEXT: compact z0.d, p1, z0.d
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -183,20 +183,20 @@ define void @test_compressstore_v8i32(ptr %p, <8 x i32> %vec, <8 x i1> %mask) {
; CHECK-BASE-NEXT: shl v2.4s, v2.4s, #31
; CHECK-BASE-NEXT: shl v3.4s, v3.4s, #31
; CHECK-BASE-NEXT: and v4.8b, v4.8b, v5.8b
+; CHECK-BASE-NEXT: cmpne p2.s, p1/z, z3.s, #0
; CHECK-BASE-NEXT: addv h4, v4.4h
+; CHECK-BASE-NEXT: cntp x9, p2, p2.s
+; CHECK-BASE-NEXT: compact z0.s, p2, z0.s
; CHECK-BASE-NEXT: fmov w8, s4
; CHECK-BASE-NEXT: and w8, w8, #0xf
; CHECK-BASE-NEXT: fmov s4, w8
; CHECK-BASE-NEXT: cnt z4.s, p0/m, z4.s
; CHECK-BASE-NEXT: cmpne p0.s, p1/z, z2.s, #0
-; CHECK-BASE-NEXT: cmpne p1.s, p1/z, z3.s, #0
+; CHECK-BASE-NEXT: whilelo p1.s, xzr, x9
; CHECK-BASE-NEXT: cntp x8, p0, p0.s
; CHECK-BASE-NEXT: compact z1.s, p0, z1.s
-; CHECK-BASE-NEXT: compact z0.s, p1, z0.s
-; CHECK-BASE-NEXT: cntp x9, p1, p1.s
; CHECK-BASE-NEXT: fmov w10, s4
; CHECK-BASE-NEXT: whilelo p0.s, xzr, x8
-; CHECK-BASE-NEXT: whilelo p1.s, xzr, x9
; CHECK-BASE-NEXT: st1w { z1.s }, p0, [x0, x10, lsl #2]
; CHECK-BASE-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-BASE-NEXT: ret
@@ -213,9 +213,9 @@ define void @test_compressstore_v8i32(ptr %p, <8 x i32> %vec, <8 x i1> %mask) {
; CHECK-VL256-NEXT: uunpklo z2.s, z2.h
; CHECK-VL256-NEXT: lsl z2.s, z2.s, #31
; CHECK-VL256-NEXT: asr z2.s, z2.s, #31
-; CHECK-VL256-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-VL256-NEXT: cntp x8, p0, p0.s
-; CHECK-VL256-NEXT: compact z0.s, p0, z0.s
+; CHECK-VL256-NEXT: cmpne p2.s, p0/z, z2.s, #0
+; CHECK-VL256-NEXT: cntp x8, p2, p2.s
+; CHECK-VL256-NEXT: compact z0.s, p2, z0.s
; CHECK-VL256-NEXT: whilelo p0.s, xzr, x8
; CHECK-VL256-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-VL256-NEXT: ret
@@ -241,18 +241,18 @@ define void @test_compressstore_v4i64(ptr %p, <4 x i64> %vec, <4 x i1> %mask) {
; CHECK-BASE-NEXT: shl v2.2d, v2.2d, #63
; CHECK-BASE-NEXT: fmov w8, s3
; CHECK-BASE-NEXT: shl v3.2d, v4.2d, #63
+; CHECK-BASE-NEXT: cmpne p2.d, p1/z, z2.d, #0
; CHECK-BASE-NEXT: and w8, w8, #0x3
; CHECK-BASE-NEXT: fmov s4, w8
+; CHECK-BASE-NEXT: cntp x9, p2, p2.d
+; CHECK-BASE-NEXT: compact z0.d, p2, z0.d
; CHECK-BASE-NEXT: cnt z4.s, p0/m, z4.s
; CHECK-BASE-NEXT: cmpne p0.d, p1/z, z3.d, #0
-; CHECK-BASE-NEXT: cmpne p1.d, p1/z, z2.d, #0
+; CHECK-BASE-NEXT: whilelo p1.d, xzr, x9
; CHECK-BASE-NEXT: cntp x8, p0, p0.d
; CHECK-BASE-NEXT: compact z1.d, p0, z1.d
-; CHECK-BASE-NEXT: compact z0.d, p1, z0.d
-; CHECK-BASE-NEXT: cntp x9, p1, p1.d
; CHECK-BASE-NEXT: fmov w10, s4
; CHECK-BASE-NEXT: whilelo p0.d, xzr, x8
-; CHECK-BASE-NEXT: whilelo p1.d, xzr, x9
; CHECK-BASE-NEXT: st1d { z1.d }, p0, [x0, x10, lsl #3]
; CHECK-BASE-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-BASE-NEXT: ret
@@ -269,9 +269,9 @@ define void @test_compressstore_v4i64(ptr %p, <4 x i64> %vec, <4 x i1> %mask) {
; CHECK-VL256-NEXT: uunpklo z2.d, z2.s
; CHECK-VL256-NEXT: lsl z2.d, z2.d, #63
; CHECK-VL256-NEXT: asr z2.d, z2.d, #63
-; CHECK-VL256-NEXT: cmpne p0.d, p0/z, z2.d, #0
-; CHECK-VL256-NEXT: cntp x8, p0, p0.d
-; CHECK-VL256-NEXT: compact z0.d, p0, z0.d
+; CHECK-VL256-NEXT: cmpne p2.d, p0/z, z2.d, #0
+; CHECK-VL256-NEXT: cntp x8, p2, p2.d
+; CHECK-VL256-NEXT: compact z0.d, p2, z0.d
; CHECK-VL256-NEXT: whilelo p0.d, xzr, x8
; CHECK-VL256-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-VL256-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-match-cmp-predicate.ll b/llvm/test/CodeGen/AArch64/sve-match-cmp-predicate.ll
new file mode 100644
index 0000000000000..b3e9b1158f505
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-match-cmp-predicate.ll
@@ -0,0 +1,205 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s | FileCheck %s
+; RUN: llc -mattr=+disable-distinct-dst-reg-cmp-match < %s | FileCheck %s --check-prefix=DISABLE-HINT
+
+target triple = "aarch64"
+
+;
+; Test that the destination register of the match/cmp instruction is different
+; from the governing predicate register.
+;
+
+define void @match_hint_distinct_pg_b(ptr %ptr, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: match_hint_distinct_pg_b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: match p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: st1b { z0.b }, p1, [x0]
+; CHECK-NEXT: ret
+;
+; DISABLE-HINT-LABEL: match_hint_distinct_pg_b:
+; DISABLE-HINT: // %bb.0:
+; DISABLE-HINT-NEXT: ptrue p0.b
+; DISABLE-HINT-NEXT: match p0.b, p0/z, z0.b, z1.b
+; DISABLE-HINT-NEXT: st1b { z0.b }, p0, [x0]
+; DISABLE-HINT-NEXT: ret
+ %match = tail call <vscale x 16 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ call void @llvm.masked.store(<vscale x 16 x i8> %a, ptr %ptr, <vscale x 16 x i1> %match)
+ ret void
+}
+
+define void @match_hint_distinct_pg_h(ptr %ptr, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: match_hint_distinct_pg_h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: match p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: st1h { z0.h }, p1, [x0]
+; CHECK-NEXT: ret
+;
+; DISABLE-HINT-LABEL: match_hint_distinct_pg_h:
+; DISABLE-HINT: // %bb.0:
+; DISABLE-HINT-NEXT: ptrue p0.h
+; DISABLE-HINT-NEXT: match p0.h, p0/z, z0.h, z1.h
+; DISABLE-HINT-NEXT: st1h { z0.h }, p0, [x0]
+; DISABLE-HINT-NEXT: ret
+ %match = tail call <vscale x 8 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+ call void @llvm.masked.store(<vscale x 8 x i16> %a, ptr %ptr, <vscale x 8 x i1> %match)
+ ret void
+}
+
+define void @cmp_hint_distinct_pg_b(ptr %ptr, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: cmp_hint_distinct_pg_b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: st1b { z0.b }, p1, [x0]
+; CHECK-NEXT: ret
+;
+; DISABLE-HINT-LABEL: cmp_hint_distinct_pg_b:
+; DISABLE-HINT: // %bb.0:
+; DISABLE-HINT-NEXT: ptrue p0.b
+; DISABLE-HINT-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
+; DISABLE-HINT-NEXT: st1b { z0.b }, p0, [x0]
+; DISABLE-HINT-NEXT: ret
+ %cmp = icmp eq <vscale x 16 x i8> %a, %b
+ call void @llvm.masked.store(<vscale x 16 x i8> %a, ptr %ptr, <vscale x 16 x i1> %cmp)
+ ret void
+}
+
+define void @cmp_hint_distinct_pg_h(ptr %ptr, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: cmp_hint_distinct_pg_h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: st1h { z0.h }, p1, [x0]
+; CHECK-NEXT: ret
+;
+; DISABLE-HINT-LABEL: cmp_hint_distinct_pg_h:
+; DISABLE-HINT: // %bb.0:
+; DISABLE-HINT-NEXT: ptrue p0.h
+; DISABLE-HINT-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; DISABLE-HINT-NEXT: st1h { z0.h }, p0, [x0]
+; DISABLE-HINT-NEXT: ret
+ %cmp = icmp eq <vscale x 8 x i16> %a, %b
+ call void @llvm.masked.store(<vscale x 8 x i16> %a, ptr %ptr, <vscale x 8 x i1> %cmp)
+ ret void
+}
+
+define void @cmp_hint_distinct_pg_s(ptr %ptr, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: cmp_hint_distinct_pg_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: st1w { z0.s }, p1, [x0]
+; CHECK-NEXT: ret
+;
+; DISABLE-HINT-LABEL: cmp_hint_distinct_pg_s:
+; DISABLE-HINT: // %bb.0:
+; DISABLE-HINT-NEXT: ptrue p0.s
+; DISABLE-HINT-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; DISABLE-HINT-NEXT: st1w { z0.s }, p0, [x0]
+; DISABLE-HINT-NEXT: ret
+ %cmp = icmp eq <vscale x 4 x i32> %a, %b
+ call void @llvm.masked.store(<vscale x 4 x i32> %a, ptr %ptr, <vscale x 4 x i1> %cmp)
+ ret void
+}
+
+define void @cmp_hint_distinct_pg_d(ptr %ptr, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: cmp_hint_distinct_pg_d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: st1d { z0.d }, p1, [x0]
+; CHECK-NEXT: ret
+;
+; DISABLE-HINT-LABEL: cmp_hint_distinct_pg_d:
+; DISABLE-HINT: // %bb.0:
+; DISABLE-HINT-NEXT: ptrue p0.d
+; DISABLE-HINT-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; DISABLE-HINT-NEXT: st1d { z0.d }, p0, [x0]
+; DISABLE-HINT-NEXT: ret
+ %cmp = icmp eq <vscale x 2 x i64> %a, %b
+ call void @llvm.masked.store(<vscale x 2 x i64> %a, ptr %ptr, <vscale x 2 x i1> %cmp)
+ ret void
+}
+
+;
+; Also test a case for the wide-element and immediate variants of CMP<>
+;
+
+define void @wide_cmp_hint_distinct_pg_b(ptr %ptr, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: wide_cmp_hint_distinct_pg_b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: st1b { z0.b }, p1, [x0]
+; CHECK-NEXT: ret
+;
+; DISABLE-HINT-LABEL: wide_cmp_hint_distinct_pg_b:
+; DISABLE-HINT: // %bb.0:
+; DISABLE-HINT-NEXT: cmpeq p0.b, p0/z, z0.b, z1.d
+; DISABLE-HINT-NEXT: st1b { z0.b }, p0, [x0]
+; DISABLE-HINT-NEXT: ret
+ %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ call void @llvm.masked.store(<vscale x 16 x i8> %a, ptr %ptr, <vscale x 16 x i1> %cmp)
+ ret void
+}
+
+define void @imm_cmp_hint_distinct_pg_b(ptr %ptr, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: imm_cmp_hint_distinct_pg_b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #12
+; CHECK-NEXT: st1b { z0.b }, p1, [x0]
+; CHECK-NEXT: ret
+;
+; DISABLE-HINT-LABEL: imm_cmp_hint_distinct_pg_b:
+; DISABLE-HINT: // %bb.0:
+; DISABLE-HINT-NEXT: ptrue p0.b
+; DISABLE-HINT-NEXT: cmpeq p0.b, p0/z, z0.b, #12
+; DISABLE-HINT-NEXT: st1b { z0.b }, p0, [x0]
+; DISABLE-HINT-NEXT: ret
+ %cmp = icmp eq <vscale x 16 x i8> %a, splat(i8 12)
+ call void @llvm.masked.store(<vscale x 16 x i8> %a, ptr %ptr, <vscale x 16 x i1> %cmp)
+ ret void
+}
+;
+; Some negative tests.
+;
+
+; Test that no different register is selected if that would introduce a mov.
+define <vscale x 16 x i1> @match_hint_dont_cause_extra_mov(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: match_hint_dont_cause_extra_mov:
+; CHECK: // %bb.0:
+; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+;
+; DISABLE-HINT-LABEL: match_hint_dont_cause_extra_mov:
+; DISABLE-HINT: // %bb.0:
+; DISABLE-HINT-NEXT: match p0.b, p0/z, z0.b, z1.b
+; DISABLE-HINT-NEXT: ret
+ %match = tail call <vscale x 16 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i1> %match
+}
+
+; This is a case that is not yet supported, because at the point of constructing
+; the hint for 'match' the all-true predicate is not yet allocated, so it can't
+; choose a distinct register yet. This case could be support by looking through
+; uses of the ptrue, but that may not be worth it because the ptrue is likely to
+; have more than one use and this seems like an artificial case.
+define <vscale x 16 x i1> @unsupported_because_ptrue_not_yet_allocated(ptr %ptr, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: unsupported_because_ptrue_not_yet_allocated:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+;
+; DISABLE-HINT-LABEL: unsupported_because_ptrue_not_yet_allocated:
+; DISABLE-HINT: // %bb.0:
+; DISABLE-HINT-NEXT: ptrue p0.b
+; DISABLE-HINT-NEXT: match p0.b, p0/z, z0.b, z1.b
+; DISABLE-HINT-NEXT: ret
+ %match = tail call <vscale x 16 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i1> %match
+}
+
+attributes #0 = { "target-features"="+sve2" }
diff --git a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
index f097d874cd11b..7d02ff7cf8971 100644
--- a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
@@ -7,8 +7,8 @@ define <4 x i32> @masked_load_v4i32(ptr %a, <4 x i1> %mask) nounwind {
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ldnt1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> %mask, <4 x i32> poison), !nontemporal !0
@@ -22,8 +22,8 @@ define void @masked_store_v4i32(<4 x i32> %x, ptr %a, <4 x i1> %mask) nounwind {
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: stnt1w { z0.s }, p0, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: stnt1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> %mask), !nontemporal !0
ret void
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
index 2b869c386993d..a74dfc0c20cf0 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
@@ -6,8 +6,8 @@ define <vscale x 4 x i32> @add_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: add z1.s, z1.s, z2.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -21,8 +21,8 @@ define <vscale x 8 x i16> @add_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: add z1.h, z1.h, z2.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -36,8 +36,8 @@ define <vscale x 16 x i8> @add_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: add z1.b, z1.b, z2.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -51,8 +51,8 @@ define <vscale x 4 x i32> @sub_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sub z1.s, z1.s, z2.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -66,8 +66,8 @@ define <vscale x 8 x i16> @sub_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sub z1.h, z1.h, z2.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -81,8 +81,8 @@ define <vscale x 16 x i8> @sub_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: sub z1.b, z1.b, z2.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -96,8 +96,8 @@ define <vscale x 4 x i32> @mul_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mul z1.s, z1.s, z2.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -111,8 +111,8 @@ define <vscale x 8 x i16> @mul_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mul z1.h, z1.h, z2.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -126,8 +126,8 @@ define <vscale x 16 x i8> @mul_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mul z1.b, z1.b, z2.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -141,8 +141,8 @@ define <vscale x 4 x i32> @and_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -156,8 +156,8 @@ define <vscale x 8 x i16> @and_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -171,8 +171,8 @@ define <vscale x 16 x i8> @and_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: and z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -186,8 +186,8 @@ define <vscale x 4 x i32> @or_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: orr z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -201,8 +201,8 @@ define <vscale x 8 x i16> @or_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: orr z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -216,8 +216,8 @@ define <vscale x 16 x i8> @or_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: orr z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -231,8 +231,8 @@ define <vscale x 4 x i32> @xor_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: eor z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -246,8 +246,8 @@ define <vscale x 8 x i16> @xor_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: eor z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -261,8 +261,8 @@ define <vscale x 16 x i8> @xor_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: eor z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -276,8 +276,8 @@ define <vscale x 4 x i32> @andnot_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: bic z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -292,8 +292,8 @@ define <vscale x 8 x i16> @andnot_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: bic z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -308,8 +308,8 @@ define <vscale x 16 x i8> @andnot_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: bic z1.d, z1.d, z2.d
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -324,8 +324,8 @@ define <vscale x 4 x i32> @ornot_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: bsl2n z1.d, z1.d, z2.d, z1.d
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -340,8 +340,8 @@ define <vscale x 8 x i16> @ornot_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: bsl2n z1.d, z1.d, z2.d, z1.d
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -356,8 +356,8 @@ define <vscale x 16 x i8> @ornot_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: bsl2n z1.d, z1.d, z2.d, z1.d
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -718,8 +718,8 @@ define <vscale x 4 x i32> @sadd_sat_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sqadd z1.s, z1.s, z2.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -733,8 +733,8 @@ define <vscale x 8 x i16> @sadd_sat_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sqadd z1.h, z1.h, z2.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -748,8 +748,8 @@ define <vscale x 16 x i8> @sadd_sat_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: sqadd z1.b, z1.b, z2.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -763,8 +763,8 @@ define <vscale x 4 x i32> @uadd_sat_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uqadd z1.s, z1.s, z2.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -778,8 +778,8 @@ define <vscale x 8 x i16> @uadd_sat_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: uqadd z1.h, z1.h, z2.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -793,8 +793,8 @@ define <vscale x 16 x i8> @uadd_sat_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: uqadd z1.b, z1.b, z2.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -808,8 +808,8 @@ define <vscale x 4 x i32> @ssub_sat_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sqsub z1.s, z1.s, z2.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -823,8 +823,8 @@ define <vscale x 8 x i16> @ssub_sat_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sqsub z1.h, z1.h, z2.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -838,8 +838,8 @@ define <vscale x 16 x i8> @ssub_sat_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: sqsub z1.b, z1.b, z2.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -853,8 +853,8 @@ define <vscale x 4 x i32> @usub_sat_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uqsub z1.s, z1.s, z2.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -868,8 +868,8 @@ define <vscale x 8 x i16> @usub_sat_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: uqsub z1.h, z1.h, z2.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -883,8 +883,8 @@ define <vscale x 16 x i8> @usub_sat_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: uqsub z1.b, z1.b, z2.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -898,9 +898,9 @@ define <vscale x 4 x i32> @addqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w0
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-NEXT: add z1.s, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -916,9 +916,9 @@ define <vscale x 8 x i16> @addqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z2.h, w0
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-NEXT: add z1.h, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -934,9 +934,9 @@ define <vscale x 16 x i8> @addqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z2.b, w0
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-NEXT: add z1.b, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -952,9 +952,9 @@ define <vscale x 4 x i32> @subqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w0
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-NEXT: sub z1.s, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -970,9 +970,9 @@ define <vscale x 8 x i16> @subqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z2.h, w0
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-NEXT: sub z1.h, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -988,9 +988,9 @@ define <vscale x 16 x i8> @subqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z2.b, w0
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-NEXT: sub z1.b, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -1006,9 +1006,9 @@ define <vscale x 4 x i32> @mulqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-NEXT: mul z1.s, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -1024,9 +1024,9 @@ define <vscale x 8 x i16> @mulqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-NEXT: mul z1.h, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -1042,9 +1042,9 @@ define <vscale x 16 x i8> @mulqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.b, w0
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-NEXT: mul z1.b, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -1174,9 +1174,9 @@ define <vscale x 4 x i32> @sadd_satqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-NEXT: sqadd z1.s, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -1192,9 +1192,9 @@ define <vscale x 8 x i16> @sadd_satqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-NEXT: sqadd z1.h, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -1210,9 +1210,9 @@ define <vscale x 16 x i8> @sadd_satqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.b, w0
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-NEXT: sqadd z1.b, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -1228,9 +1228,9 @@ define <vscale x 4 x i32> @uadd_satqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-NEXT: uqadd z1.s, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -1246,9 +1246,9 @@ define <vscale x 8 x i16> @uadd_satqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-NEXT: uqadd z1.h, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -1264,9 +1264,9 @@ define <vscale x 16 x i8> @uadd_satqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.b, w0
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-NEXT: uqadd z1.b, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -1282,9 +1282,9 @@ define <vscale x 4 x i32> @ssub_satqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-NEXT: sqsub z1.s, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -1300,9 +1300,9 @@ define <vscale x 8 x i16> @ssub_satqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-NEXT: sqsub z1.h, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -1318,9 +1318,9 @@ define <vscale x 16 x i8> @ssub_satqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.b, w0
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-NEXT: sqsub z1.b, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
@@ -1336,9 +1336,9 @@ define <vscale x 4 x i32> @usub_satqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-NEXT: uqsub z1.s, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 4 x i32> %z, zeroinitializer
@@ -1354,9 +1354,9 @@ define <vscale x 8 x i16> @usub_satqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-NEXT: uqsub z1.h, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 8 x i16> %z, zeroinitializer
@@ -1372,9 +1372,9 @@ define <vscale x 16 x i8> @usub_satqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.b, w0
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-NEXT: uqsub z1.b, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp eq <vscale x 16 x i8> %z, zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
index 0c0762da5bba2..51a106e011d8b 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
@@ -5,8 +5,8 @@ define <vscale x 2 x i64> @add_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: add_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: add z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -19,8 +19,8 @@ define <vscale x 4 x i32> @add_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: add_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: add z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -33,8 +33,8 @@ define <vscale x 8 x i16> @add_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: add_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: add z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -47,8 +47,8 @@ define <vscale x 16 x i8> @add_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: add_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: add z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -61,8 +61,8 @@ define <vscale x 2 x i64> @sub_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: sub_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: sub z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -75,8 +75,8 @@ define <vscale x 4 x i32> @sub_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: sub_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sub z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -89,8 +89,8 @@ define <vscale x 8 x i16> @sub_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: sub_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sub z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -103,8 +103,8 @@ define <vscale x 16 x i8> @sub_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: sub_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: sub z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -117,8 +117,8 @@ define <vscale x 2 x i64> @mul_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mul_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: mul z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -131,8 +131,8 @@ define <vscale x 4 x i32> @mul_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mul_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: mul z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -145,8 +145,8 @@ define <vscale x 8 x i16> @mul_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mul_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: mul z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -159,8 +159,8 @@ define <vscale x 16 x i8> @mul_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mul_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: mul z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -174,8 +174,8 @@ define <vscale x 2 x i64> @sdiv_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sdivr z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: mov z0.d, p1/m, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -189,8 +189,8 @@ define <vscale x 4 x i32> @sdiv_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -210,9 +210,9 @@ define <vscale x 8 x i16> @sdiv_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i
; CHECK-NEXT: sunpklo z4.s, z0.h
; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z4.s
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
; CHECK-NEXT: uzp1 z1.h, z1.h, z3.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -243,10 +243,10 @@ define <vscale x 16 x i8> @sdiv_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z4.s
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
; CHECK-NEXT: uzp1 z1.h, z1.h, z6.h
; CHECK-NEXT: uzp1 z1.b, z1.b, z3.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -260,8 +260,8 @@ define <vscale x 2 x i64> @udiv_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: udivr z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: mov z0.d, p1/m, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -275,8 +275,8 @@ define <vscale x 4 x i32> @udiv_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -296,9 +296,9 @@ define <vscale x 8 x i16> @udiv_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i
; CHECK-NEXT: uunpklo z4.s, z0.h
; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z4.s
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
; CHECK-NEXT: uzp1 z1.h, z1.h, z3.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -329,10 +329,10 @@ define <vscale x 16 x i8> @udiv_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z4.s
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
; CHECK-NEXT: uzp1 z1.h, z1.h, z6.h
; CHECK-NEXT: uzp1 z1.b, z1.b, z3.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -347,8 +347,8 @@ define <vscale x 2 x i64> @srem_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: sdiv z3.d, p0/m, z3.d, z1.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: mls z0.d, p0/m, z3.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: mls z0.d, p1/m, z3.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -363,8 +363,8 @@ define <vscale x 4 x i32> @srem_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z1.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: mls z0.s, p0/m, z3.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: mls z0.s, p1/m, z3.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -384,9 +384,9 @@ define <vscale x 8 x i16> @srem_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i
; CHECK-NEXT: sunpklo z4.s, z1.h
; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
; CHECK-NEXT: uzp1 z2.h, z4.h, z3.h
-; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT: mls z0.h, p1/m, z2.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -417,10 +417,10 @@ define <vscale x 16 x i8> @srem_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z6.s
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
; CHECK-NEXT: uzp1 z4.h, z4.h, z7.h
; CHECK-NEXT: uzp1 z2.b, z4.b, z3.b
-; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
+; CHECK-NEXT: mls z0.b, p1/m, z2.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -435,8 +435,8 @@ define <vscale x 2 x i64> @urem_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: udiv z3.d, p0/m, z3.d, z1.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: mls z0.d, p0/m, z3.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: mls z0.d, p1/m, z3.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -451,8 +451,8 @@ define <vscale x 4 x i32> @urem_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z1.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: mls z0.s, p0/m, z3.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: mls z0.s, p1/m, z3.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -472,9 +472,9 @@ define <vscale x 8 x i16> @urem_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i
; CHECK-NEXT: uunpklo z4.s, z1.h
; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
; CHECK-NEXT: uzp1 z2.h, z4.h, z3.h
-; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT: mls z0.h, p1/m, z2.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -505,10 +505,10 @@ define <vscale x 16 x i8> @urem_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z6.s
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
; CHECK-NEXT: uzp1 z4.h, z4.h, z7.h
; CHECK-NEXT: uzp1 z2.b, z4.b, z3.b
-; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
+; CHECK-NEXT: mls z0.b, p1/m, z2.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -521,8 +521,8 @@ define <vscale x 2 x i64> @and_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: and_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: and z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: and z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -535,8 +535,8 @@ define <vscale x 4 x i32> @and_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: and_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: and z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: and z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -549,8 +549,8 @@ define <vscale x 8 x i16> @and_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: and_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: and z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: and z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -563,8 +563,8 @@ define <vscale x 16 x i8> @and_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: and_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: and z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: and z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -577,8 +577,8 @@ define <vscale x 2 x i64> @or_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i64
; CHECK-LABEL: or_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: orr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: orr z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -591,8 +591,8 @@ define <vscale x 4 x i32> @or_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i32
; CHECK-LABEL: or_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: orr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: orr z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -605,8 +605,8 @@ define <vscale x 8 x i16> @or_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i16
; CHECK-LABEL: or_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: orr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: orr z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -619,8 +619,8 @@ define <vscale x 16 x i8> @or_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i8
; CHECK-LABEL: or_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: orr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: orr z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -633,8 +633,8 @@ define <vscale x 2 x i64> @xor_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: xor_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: eor z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: eor z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -647,8 +647,8 @@ define <vscale x 4 x i32> @xor_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: xor_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: eor z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: eor z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -661,8 +661,8 @@ define <vscale x 8 x i16> @xor_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: xor_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: eor z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: eor z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -675,8 +675,8 @@ define <vscale x 16 x i8> @xor_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: xor_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: eor z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: eor z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -869,8 +869,8 @@ define <vscale x 2 x i64> @mla_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mla_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
-; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: mla z0.d, p1/m, z1.d, z2.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -884,8 +884,8 @@ define <vscale x 4 x i32> @mla_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mla_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
-; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
+; CHECK-NEXT: mla z0.s, p1/m, z1.s, z2.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -899,8 +899,8 @@ define <vscale x 8 x i16> @mla_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mla_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
-; CHECK-NEXT: mla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
+; CHECK-NEXT: mla z0.h, p1/m, z1.h, z2.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -914,8 +914,8 @@ define <vscale x 16 x i8> @mla_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mla_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
-; CHECK-NEXT: mla z0.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
+; CHECK-NEXT: mla z0.b, p1/m, z1.b, z2.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -929,8 +929,8 @@ define <vscale x 2 x i64> @mls_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mls_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
-; CHECK-NEXT: msb z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: msb z0.d, p1/m, z1.d, z2.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -944,8 +944,8 @@ define <vscale x 4 x i32> @mls_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mls_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
-; CHECK-NEXT: msb z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
+; CHECK-NEXT: msb z0.s, p1/m, z1.s, z2.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -959,8 +959,8 @@ define <vscale x 8 x i16> @mls_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mls_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
-; CHECK-NEXT: msb z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
+; CHECK-NEXT: msb z0.h, p1/m, z1.h, z2.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -974,8 +974,8 @@ define <vscale x 16 x i8> @mls_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mls_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
-; CHECK-NEXT: msb z0.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
+; CHECK-NEXT: msb z0.b, p1/m, z1.b, z2.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -1442,8 +1442,8 @@ define <vscale x 2 x i64> @add_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: add_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: add z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: add z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1457,8 +1457,8 @@ define <vscale x 4 x i32> @add_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: add_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: add z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: add z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1472,8 +1472,8 @@ define <vscale x 8 x i16> @add_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: add_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: add z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: add z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1487,8 +1487,8 @@ define <vscale x 16 x i8> @add_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: add_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: add z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: add z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1502,8 +1502,8 @@ define <vscale x 2 x i64> @sub_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: sub_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: subr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: subr z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1517,8 +1517,8 @@ define <vscale x 4 x i32> @sub_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: sub_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: subr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: subr z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1532,8 +1532,8 @@ define <vscale x 8 x i16> @sub_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: sub_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: subr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: subr z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1547,8 +1547,8 @@ define <vscale x 16 x i8> @sub_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: sub_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: subr z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: subr z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1562,8 +1562,8 @@ define <vscale x 2 x i64> @mul_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mul_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: mul z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1577,8 +1577,8 @@ define <vscale x 4 x i32> @mul_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mul_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: mul z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1592,8 +1592,8 @@ define <vscale x 8 x i16> @mul_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mul_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: mul z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: mul z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1607,8 +1607,8 @@ define <vscale x 16 x i8> @mul_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mul_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: mul z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1623,8 +1623,8 @@ define <vscale x 2 x i64> @sdiv_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -1638,8 +1638,8 @@ define <vscale x 4 x i32> @sdiv_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -1659,9 +1659,9 @@ define <vscale x 8 x i16> @sdiv_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i
; CHECK-NEXT: sunpklo z4.s, z1.h
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -1692,10 +1692,10 @@ define <vscale x 16 x i8> @sdiv_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
; CHECK-NEXT: uzp1 z0.h, z0.h, z6.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z3.b
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -1709,8 +1709,8 @@ define <vscale x 2 x i64> @udiv_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -1724,8 +1724,8 @@ define <vscale x 4 x i32> @udiv_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -1745,9 +1745,9 @@ define <vscale x 8 x i16> @udiv_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i
; CHECK-NEXT: uunpklo z4.s, z1.h
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -1778,10 +1778,10 @@ define <vscale x 16 x i8> @udiv_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
; CHECK-NEXT: uzp1 z0.h, z0.h, z6.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z3.b
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -1796,8 +1796,8 @@ define <vscale x 2 x i64> @srem_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: sdiv z3.d, p0/m, z3.d, z1.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: msb z1.d, p0/m, z3.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: msb z1.d, p1/m, z3.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1813,8 +1813,8 @@ define <vscale x 4 x i32> @srem_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z1.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: msb z1.s, p0/m, z3.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: msb z1.s, p1/m, z3.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1835,9 +1835,9 @@ define <vscale x 8 x i16> @srem_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i
; CHECK-NEXT: sunpklo z4.s, z1.h
; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
; CHECK-NEXT: uzp1 z2.h, z4.h, z3.h
-; CHECK-NEXT: msb z1.h, p0/m, z2.h, z0.h
+; CHECK-NEXT: msb z1.h, p1/m, z2.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1869,10 +1869,10 @@ define <vscale x 16 x i8> @srem_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z6.s
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
; CHECK-NEXT: uzp1 z4.h, z4.h, z7.h
; CHECK-NEXT: uzp1 z2.b, z4.b, z3.b
-; CHECK-NEXT: msb z1.b, p0/m, z2.b, z0.b
+; CHECK-NEXT: msb z1.b, p1/m, z2.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1888,8 +1888,8 @@ define <vscale x 2 x i64> @urem_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: udiv z3.d, p0/m, z3.d, z1.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: msb z1.d, p0/m, z3.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: msb z1.d, p1/m, z3.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1905,8 +1905,8 @@ define <vscale x 4 x i32> @urem_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z1.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: msb z1.s, p0/m, z3.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: msb z1.s, p1/m, z3.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1927,9 +1927,9 @@ define <vscale x 8 x i16> @urem_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i
; CHECK-NEXT: uunpklo z4.s, z1.h
; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
; CHECK-NEXT: uzp1 z2.h, z4.h, z3.h
-; CHECK-NEXT: msb z1.h, p0/m, z2.h, z0.h
+; CHECK-NEXT: msb z1.h, p1/m, z2.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1961,10 +1961,10 @@ define <vscale x 16 x i8> @urem_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z6.s
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
; CHECK-NEXT: uzp1 z4.h, z4.h, z7.h
; CHECK-NEXT: uzp1 z2.b, z4.b, z3.b
-; CHECK-NEXT: msb z1.b, p0/m, z2.b, z0.b
+; CHECK-NEXT: msb z1.b, p1/m, z2.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1978,8 +1978,8 @@ define <vscale x 2 x i64> @and_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: and_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: and z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: and z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1993,8 +1993,8 @@ define <vscale x 4 x i32> @and_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: and_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: and z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: and z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2008,8 +2008,8 @@ define <vscale x 8 x i16> @and_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: and_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: and z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: and z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2023,8 +2023,8 @@ define <vscale x 16 x i8> @and_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: and_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: and z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: and z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2038,8 +2038,8 @@ define <vscale x 2 x i64> @or_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i64
; CHECK-LABEL: or_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: orr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: orr z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2053,8 +2053,8 @@ define <vscale x 4 x i32> @or_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i32
; CHECK-LABEL: or_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: orr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: orr z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2068,8 +2068,8 @@ define <vscale x 8 x i16> @or_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i16
; CHECK-LABEL: or_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: orr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: orr z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2083,8 +2083,8 @@ define <vscale x 16 x i8> @or_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i8
; CHECK-LABEL: or_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: orr z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: orr z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2098,8 +2098,8 @@ define <vscale x 2 x i64> @xor_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: xor_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: eor z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: eor z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2113,8 +2113,8 @@ define <vscale x 4 x i32> @xor_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: xor_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: eor z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: eor z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2128,8 +2128,8 @@ define <vscale x 8 x i16> @xor_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: xor_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: eor z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: eor z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2143,8 +2143,8 @@ define <vscale x 16 x i8> @xor_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: xor_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: eor z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: eor z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2338,8 +2338,8 @@ define <vscale x 2 x i64> @mla_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mla_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
-; CHECK-NEXT: mad z1.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: mad z1.d, p1/m, z2.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2354,8 +2354,8 @@ define <vscale x 4 x i32> @mla_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mla_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
-; CHECK-NEXT: mad z1.s, p0/m, z2.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
+; CHECK-NEXT: mad z1.s, p1/m, z2.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2370,8 +2370,8 @@ define <vscale x 8 x i16> @mla_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mla_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
-; CHECK-NEXT: mad z1.h, p0/m, z2.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
+; CHECK-NEXT: mad z1.h, p1/m, z2.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2386,8 +2386,8 @@ define <vscale x 16 x i8> @mla_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mla_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
-; CHECK-NEXT: mad z1.b, p0/m, z2.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
+; CHECK-NEXT: mad z1.b, p1/m, z2.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2402,8 +2402,8 @@ define <vscale x 2 x i64> @mls_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mls_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
-; CHECK-NEXT: msb z1.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: msb z1.d, p1/m, z0.d, z2.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2418,8 +2418,8 @@ define <vscale x 4 x i32> @mls_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mls_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
-; CHECK-NEXT: msb z1.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
+; CHECK-NEXT: msb z1.s, p1/m, z0.s, z2.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2434,8 +2434,8 @@ define <vscale x 8 x i16> @mls_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mls_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
-; CHECK-NEXT: msb z1.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
+; CHECK-NEXT: msb z1.h, p1/m, z0.h, z2.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2450,8 +2450,8 @@ define <vscale x 16 x i8> @mls_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mls_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
-; CHECK-NEXT: msb z1.b, p0/m, z0.b, z2.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
+; CHECK-NEXT: msb z1.b, p1/m, z0.b, z2.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -2921,9 +2921,9 @@ define <vscale x 4 x i32> @mul_use_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mul z1.s, z0.s, z1.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
; CHECK-NEXT: str z1, [x0]
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
index 58d6149b94d3a..eae255ce8e4b6 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
@@ -5,8 +5,8 @@ define <vscale x 2 x i64> @add_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: add_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: add z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -19,8 +19,8 @@ define <vscale x 4 x i32> @add_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: add_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: add z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -33,8 +33,8 @@ define <vscale x 8 x i16> @add_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: add_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: add z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -47,8 +47,8 @@ define <vscale x 16 x i8> @add_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: add_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: add z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -61,8 +61,8 @@ define <vscale x 2 x i64> @sub_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: sub_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: sub z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -75,8 +75,8 @@ define <vscale x 4 x i32> @sub_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: sub_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sub z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -89,8 +89,8 @@ define <vscale x 8 x i16> @sub_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: sub_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sub z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -103,8 +103,8 @@ define <vscale x 16 x i8> @sub_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: sub_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: sub z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -117,8 +117,8 @@ define <vscale x 2 x i64> @mul_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mul_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: mul z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -131,8 +131,8 @@ define <vscale x 4 x i32> @mul_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mul_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: mul z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -145,8 +145,8 @@ define <vscale x 8 x i16> @mul_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mul_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: mul z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -159,8 +159,8 @@ define <vscale x 16 x i8> @mul_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mul_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: mul z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -173,8 +173,8 @@ define <vscale x 2 x i64> @and_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: and_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: and z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: and z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -187,8 +187,8 @@ define <vscale x 4 x i32> @and_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: and_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: and z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: and z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -201,8 +201,8 @@ define <vscale x 8 x i16> @and_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: and_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: and z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: and z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -215,8 +215,8 @@ define <vscale x 16 x i8> @and_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: and_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: and z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: and z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -229,8 +229,8 @@ define <vscale x 2 x i64> @or_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i64
; CHECK-LABEL: or_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: orr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: orr z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -243,8 +243,8 @@ define <vscale x 4 x i32> @or_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i32
; CHECK-LABEL: or_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: orr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: orr z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -257,8 +257,8 @@ define <vscale x 8 x i16> @or_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i16
; CHECK-LABEL: or_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: orr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: orr z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -271,8 +271,8 @@ define <vscale x 16 x i8> @or_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i8
; CHECK-LABEL: or_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: orr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: orr z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -285,8 +285,8 @@ define <vscale x 2 x i64> @xor_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: xor_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: eor z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: eor z0.d, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -299,8 +299,8 @@ define <vscale x 4 x i32> @xor_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: xor_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: eor z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: eor z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -313,8 +313,8 @@ define <vscale x 8 x i16> @xor_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: xor_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: eor z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: eor z0.h, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -327,8 +327,8 @@ define <vscale x 16 x i8> @xor_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: xor_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: eor z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: eor z0.b, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -521,8 +521,8 @@ define <vscale x 2 x i64> @mla_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mla_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
-; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: mla z0.d, p1/m, z1.d, z2.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -536,8 +536,8 @@ define <vscale x 4 x i32> @mla_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mla_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
-; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
+; CHECK-NEXT: mla z0.s, p1/m, z1.s, z2.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -551,8 +551,8 @@ define <vscale x 8 x i16> @mla_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mla_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
-; CHECK-NEXT: mla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
+; CHECK-NEXT: mla z0.h, p1/m, z1.h, z2.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -566,8 +566,8 @@ define <vscale x 16 x i8> @mla_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mla_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
-; CHECK-NEXT: mla z0.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
+; CHECK-NEXT: mla z0.b, p1/m, z1.b, z2.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -581,8 +581,8 @@ define <vscale x 2 x i64> @mls_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mls_nxv2i64_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
-; CHECK-NEXT: msb z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: msb z0.d, p1/m, z1.d, z2.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -596,8 +596,8 @@ define <vscale x 4 x i32> @mls_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mls_nxv4i32_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
-; CHECK-NEXT: msb z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
+; CHECK-NEXT: msb z0.s, p1/m, z1.s, z2.s
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -611,8 +611,8 @@ define <vscale x 8 x i16> @mls_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mls_nxv8i16_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
-; CHECK-NEXT: msb z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
+; CHECK-NEXT: msb z0.h, p1/m, z1.h, z2.h
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -626,8 +626,8 @@ define <vscale x 16 x i8> @mls_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mls_nxv16i8_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
-; CHECK-NEXT: msb z0.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
+; CHECK-NEXT: msb z0.b, p1/m, z1.b, z2.b
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -869,8 +869,8 @@ define <vscale x 2 x i64> @add_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: add_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: add z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: add z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -884,8 +884,8 @@ define <vscale x 4 x i32> @add_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: add_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: add z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: add z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -899,8 +899,8 @@ define <vscale x 8 x i16> @add_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: add_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: add z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: add z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -914,8 +914,8 @@ define <vscale x 16 x i8> @add_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: add_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: add z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: add z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -929,8 +929,8 @@ define <vscale x 2 x i64> @sub_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: sub_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: subr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: subr z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -944,8 +944,8 @@ define <vscale x 4 x i32> @sub_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: sub_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: subr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: subr z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -959,8 +959,8 @@ define <vscale x 8 x i16> @sub_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: sub_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: subr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: subr z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -974,8 +974,8 @@ define <vscale x 16 x i8> @sub_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: sub_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: subr z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: subr z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -989,8 +989,8 @@ define <vscale x 2 x i64> @mul_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mul_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: mul z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1004,8 +1004,8 @@ define <vscale x 4 x i32> @mul_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mul_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: mul z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1019,8 +1019,8 @@ define <vscale x 8 x i16> @mul_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mul_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: mul z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: mul z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1034,8 +1034,8 @@ define <vscale x 16 x i8> @mul_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mul_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: mul z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1049,8 +1049,8 @@ define <vscale x 2 x i64> @and_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: and_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: and z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: and z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1064,8 +1064,8 @@ define <vscale x 4 x i32> @and_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: and_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: and z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: and z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1079,8 +1079,8 @@ define <vscale x 8 x i16> @and_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: and_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: and z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: and z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1094,8 +1094,8 @@ define <vscale x 16 x i8> @and_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: and_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: and z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: and z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1109,8 +1109,8 @@ define <vscale x 2 x i64> @or_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i64
; CHECK-LABEL: or_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: orr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: orr z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1124,8 +1124,8 @@ define <vscale x 4 x i32> @or_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i32
; CHECK-LABEL: or_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: orr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: orr z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1139,8 +1139,8 @@ define <vscale x 8 x i16> @or_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i16
; CHECK-LABEL: or_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: orr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: orr z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1154,8 +1154,8 @@ define <vscale x 16 x i8> @or_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i8
; CHECK-LABEL: or_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: orr z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: orr z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1169,8 +1169,8 @@ define <vscale x 2 x i64> @xor_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: xor_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: eor z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: eor z1.d, p1/m, z1.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1184,8 +1184,8 @@ define <vscale x 4 x i32> @xor_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: xor_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: eor z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: eor z1.s, p1/m, z1.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1199,8 +1199,8 @@ define <vscale x 8 x i16> @xor_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: xor_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: eor z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: eor z1.h, p1/m, z1.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1214,8 +1214,8 @@ define <vscale x 16 x i8> @xor_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: xor_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: eor z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: eor z1.b, p1/m, z1.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1409,8 +1409,8 @@ define <vscale x 2 x i64> @mla_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mla_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
-; CHECK-NEXT: mad z1.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: mad z1.d, p1/m, z2.d, z0.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1425,8 +1425,8 @@ define <vscale x 4 x i32> @mla_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mla_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
-; CHECK-NEXT: mad z1.s, p0/m, z2.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
+; CHECK-NEXT: mad z1.s, p1/m, z2.s, z0.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1441,8 +1441,8 @@ define <vscale x 8 x i16> @mla_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mla_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
-; CHECK-NEXT: mad z1.h, p0/m, z2.h, z0.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
+; CHECK-NEXT: mad z1.h, p1/m, z2.h, z0.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1457,8 +1457,8 @@ define <vscale x 16 x i8> @mla_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mla_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
-; CHECK-NEXT: mad z1.b, p0/m, z2.b, z0.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
+; CHECK-NEXT: mad z1.b, p1/m, z2.b, z0.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1473,8 +1473,8 @@ define <vscale x 2 x i64> @mls_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: mls_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
-; CHECK-NEXT: msb z1.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: msb z1.d, p1/m, z0.d, z2.d
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1489,8 +1489,8 @@ define <vscale x 4 x i32> @mls_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: mls_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
-; CHECK-NEXT: msb z1.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
+; CHECK-NEXT: msb z1.s, p1/m, z0.s, z2.s
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1505,8 +1505,8 @@ define <vscale x 8 x i16> @mls_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: mls_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
-; CHECK-NEXT: msb z1.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
+; CHECK-NEXT: msb z1.h, p1/m, z0.h, z2.h
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1521,8 +1521,8 @@ define <vscale x 16 x i8> @mls_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: mls_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
-; CHECK-NEXT: msb z1.b, p0/m, z0.b, z2.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
+; CHECK-NEXT: msb z1.b, p1/m, z0.b, z2.b
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
@@ -1811,9 +1811,9 @@ define <vscale x 4 x i32> @mul_nxv4i32_multiuse_x(<vscale x 4 x i32> %x, <vscale
; CHECK-LABEL: mul_nxv4i32_multiuse_x:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
+; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
; CHECK-NEXT: mov z2.s, #1 // =0x1
-; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s
+; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s
; CHECK-NEXT: mul z0.s, z1.s, z0.s
; CHECK-NEXT: str z1, [x0]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
index d0ea1ddb252bb..d31457b031443 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
@@ -8,7 +8,7 @@
define i32 @cmpeq_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmpeq_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -20,7 +20,7 @@ define i32 @cmpeq_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpeq_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -36,7 +36,7 @@ define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
define i32 @cmpeq_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmpeq_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -53,7 +53,7 @@ define i32 @cmpeq_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
define i32 @cmpeq_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpeq_wide_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
@@ -65,7 +65,7 @@ define i32 @cmpeq_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpeq_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -79,7 +79,7 @@ define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
define i32 @cmpeq_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpeq_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
index 5dae689b82a72..e2a0a0e61b0e2 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
@@ -8,7 +8,7 @@
define i32 @cmpge_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmpge_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -20,7 +20,7 @@ define i32 @cmpge_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpge_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -36,7 +36,7 @@ define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
define i32 @cmpge_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmpge_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -53,7 +53,7 @@ define i32 @cmpge_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
define i32 @cmpge_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpge_wide_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
@@ -65,7 +65,7 @@ define i32 @cmpge_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpge_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -79,7 +79,7 @@ define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
define i32 @cmpge_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpge_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
index c2dc452ad88f6..1ea7051e691e2 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
@@ -8,7 +8,7 @@
define i32 @cmpgt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmpgt_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpgt p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -20,7 +20,7 @@ define i32 @cmpgt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpgt_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -36,7 +36,7 @@ define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
define i32 @cmpgt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmpgt_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpgt p1.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -53,7 +53,7 @@ define i32 @cmpgt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
define i32 @cmpgt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpgt_wide_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cmpgt p1.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
@@ -65,7 +65,7 @@ define i32 @cmpgt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpgt_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpgt p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -79,7 +79,7 @@ define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
define i32 @cmpgt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpgt_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
index e4b45921ece68..5624d85b2b278 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
@@ -8,7 +8,7 @@
define i32 @cmphi_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmphi_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmphi p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -20,7 +20,7 @@ define i32 @cmphi_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmphi_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -37,7 +37,7 @@ define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
define i32 @cmphi_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmphi_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmphi p1.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -54,7 +54,7 @@ define i32 @cmphi_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
define i32 @cmphi_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmphi_wide_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cmphi p1.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
@@ -66,7 +66,7 @@ define i32 @cmphi_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmphi_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cmphi p1.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -80,7 +80,7 @@ define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
define i32 @cmphi_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmphi_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
index 42906f6e9703d..877c04f7fcd76 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
@@ -8,7 +8,7 @@
define i32 @cmphs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmphs_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmphs p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -20,7 +20,7 @@ define i32 @cmphs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmphs_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -36,7 +36,7 @@ define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
define i32 @cmphs_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmphs_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmphs p1.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -53,7 +53,7 @@ define i32 @cmphs_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
define i32 @cmphs_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmphs_wide_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cmphs p1.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
@@ -65,7 +65,7 @@ define i32 @cmphs_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmphs_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphs p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cmphs p1.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -79,7 +79,7 @@ define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
define i32 @cmphs_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmphs_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
index 8bd38d7bc44df..f0f58f3ed4154 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
@@ -10,7 +10,7 @@ target triple = "aarch64-unknown-linux-gnu"
define i32 @cmple_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmple_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmple p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmple p1.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
@@ -27,7 +27,7 @@ define i32 @cmple_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
define i32 @cmple_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmple_wide_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmple p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cmple p1.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
@@ -39,7 +39,7 @@ define i32 @cmple_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmple_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmple p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cmple p1.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -53,7 +53,7 @@ define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
define i32 @cmple_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmple_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmple p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cmple p1.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -74,7 +74,7 @@ define i32 @cmple_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <v
define i1 @cmp8_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_first_px:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -88,7 +88,7 @@ define i1 @cmp8_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i1 @cmp8_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_last_px:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -102,7 +102,7 @@ define i1 @cmp8_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vs
define i1 @cmp8_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_any_px:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -117,7 +117,7 @@ define i1 @cmp8_ptest_any_px_bad_ptrue(<vscale x 16 x i8> %a, <vscale x 16 x i8>
; CHECK-LABEL: cmp8_ptest_any_px_bad_ptrue:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -167,7 +167,7 @@ define i1 @cmp32_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <v
define i1 @cmp32_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_px:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -184,7 +184,7 @@ define i1 @cmp32_ptest_any_px_bad_ptrue(<vscale x 4 x i32> %a, <vscale x 4 x i32
; CHECK-LABEL: cmp32_ptest_any_px_bad_ptrue:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
@@ -225,7 +225,7 @@ define i1 @cmp32_ptest_any_px_bad_ptrue(<vscale x 4 x i32> %a, <vscale x 4 x i32
define i1 @cmp8_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_first_xx:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -240,7 +240,7 @@ define i1 @cmp8_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i1 @cmp8_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_last_xx:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -254,7 +254,7 @@ define i1 @cmp8_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vs
define i1 @cmp8_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_any_xx:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -269,7 +269,7 @@ define i1 @cmp8_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsc
define i1 @cmp32_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_first_xx:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -285,7 +285,7 @@ define i1 @cmp32_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <
define i1 @cmp32_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_last_xx:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -300,7 +300,7 @@ define i1 @cmp32_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <v
define i1 @cmp32_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_xx:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -320,9 +320,9 @@ define i1 @cmp32_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vs
define i1 @cmp8_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_first_ax:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -338,9 +338,9 @@ define i1 @cmp8_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i1 @cmp8_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_last_ax:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -355,7 +355,7 @@ define i1 @cmp8_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vs
define i1 @cmp8_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_any_ax:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -371,9 +371,9 @@ define i1 @cmp8_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsc
define i1 @cmp32_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_first_ax:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -390,9 +390,9 @@ define i1 @cmp32_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <
define i1 @cmp32_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_last_ax:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -408,7 +408,7 @@ define i1 @cmp32_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <v
define i1 @cmp32_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_ax:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -429,7 +429,7 @@ define i1 @cmp8_ptest_first_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_first_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
@@ -445,7 +445,7 @@ define i1 @cmp8_ptest_last_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_last_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
@@ -461,7 +461,7 @@ define i1 @cmp8_ptest_any_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_any_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpge p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
@@ -477,7 +477,7 @@ define i1 @cmp32_ptest_first_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_first_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -493,7 +493,7 @@ define i1 @cmp32_ptest_last_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_last_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -509,7 +509,7 @@ define i1 @cmp32_ptest_any_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
index 3379e8637193a..c2421bb2f8463 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
@@ -8,7 +8,7 @@
define i32 @cmplo_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmplo_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplo p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmplo p1.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
@@ -25,7 +25,7 @@ define i32 @cmplo_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
define i32 @cmplo_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmplo_wide_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplo p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cmplo p1.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmplo.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
@@ -37,7 +37,7 @@ define i32 @cmplo_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i32 @cmplo_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmplo_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplo p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cmplo p1.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -51,7 +51,7 @@ define i32 @cmplo_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
define i32 @cmplo_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmplo_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplo p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cmplo p1.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
index dbbabe9e7d0fa..998097ba494cb 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
@@ -8,7 +8,7 @@
define i32 @cmpls_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmpls_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpls p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpls p1.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
@@ -25,7 +25,7 @@ define i32 @cmpls_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
define i32 @cmpls_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpls_wide_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpls p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cmpls p1.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpls.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
@@ -37,7 +37,7 @@ define i32 @cmpls_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i32 @cmpls_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpls_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpls p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cmpls p1.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -51,7 +51,7 @@ define i32 @cmpls_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
define i32 @cmpls_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpls_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpls p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cmpls p1.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
index cf15a3572070b..baf7ab5548215 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
@@ -8,7 +8,7 @@
define i32 @cmplt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmplt_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplt p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmplt p1.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
@@ -25,7 +25,7 @@ define i32 @cmplt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
define i32 @cmplt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmplt_wide_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplt p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cmplt p1.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmplt.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
@@ -37,7 +37,7 @@ define i32 @cmplt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i32 @cmplt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmplt_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplt p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cmplt p1.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -51,7 +51,7 @@ define i32 @cmplt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
define i32 @cmplt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmplt_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplt p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cmplt p1.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
index ba4bd4b497d2c..3d47c28774169 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
@@ -8,7 +8,7 @@
define i32 @cmpne_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmpne_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -20,7 +20,7 @@ define i32 @cmpne_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @cmpne_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpne_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -36,7 +36,7 @@ define i32 @cmpne_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
define i32 @cmpne_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmpne_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
@@ -53,7 +53,7 @@ define i32 @cmpne_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
define i32 @cmpne_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpne_wide_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
@@ -65,7 +65,7 @@ define i32 @cmpne_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
define i32 @cmpne_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpne_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
@@ -79,7 +79,7 @@ define i32 @cmpne_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <v
define i32 @cmpne_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpne_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll
index edd87e6a5a5ba..83aa3eafd1fc9 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll
@@ -8,7 +8,7 @@
define i32 @match_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: match_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: match p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -20,7 +20,7 @@ define i32 @match_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
define i32 @match_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: match_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: match p1.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll b/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
index 970f55225daf5..ee51bf1befab0 100644
--- a/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
@@ -6,8 +6,8 @@ define <vscale x 8 x i1> @masked_load_sext_i8i16(ptr %ap, <vscale x 16 x i8> %b)
; CHECK-LABEL: masked_load_sext_i8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
@@ -23,9 +23,9 @@ define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_vl(ptr %ap, <vscale x 16
; CHECK-LABEL: masked_load_sext_i8i16_ptrue_vl:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: ptrue p1.h, vl32
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
@@ -41,9 +41,9 @@ define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_vl(ptr %ap, <vscale x 16
define <vscale x 8 x i1> @masked_load_sext_i8i16_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 {
; CHECK-LABEL: masked_load_sext_i8i16_parg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p2.b, p0/z, z0.b, #0
; CHECK-NEXT: ptrue p1.h, vl32
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p2.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
@@ -58,8 +58,8 @@ define <vscale x 4 x i1> @masked_load_sext_i8i32(ptr %ap, <vscale x 16 x i8> %b)
; CHECK-LABEL: masked_load_sext_i8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
@@ -76,9 +76,9 @@ define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_vl(ptr %ap, <vscale x 16
; CHECK-LABEL: masked_load_sext_i8i32_ptrue_vl:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: ptrue p1.s, vl32
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
@@ -95,9 +95,9 @@ define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_vl(ptr %ap, <vscale x 16
define <vscale x 4 x i1> @masked_load_sext_i8i32_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 {
; CHECK-LABEL: masked_load_sext_i8i32_parg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p2.b, p0/z, z0.b, #0
; CHECK-NEXT: ptrue p1.s, vl32
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p2.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
@@ -113,8 +113,8 @@ define <vscale x 2 x i1> @masked_load_sext_i8i64(ptr %ap, <vscale x 16 x i8> %b)
; CHECK-LABEL: masked_load_sext_i8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ret
@@ -132,9 +132,9 @@ define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_vl(ptr %ap, <vscale x 16
; CHECK-LABEL: masked_load_sext_i8i64_ptrue_vl:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: ptrue p1.d, vl32
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
@@ -152,9 +152,9 @@ define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_vl(ptr %ap, <vscale x 16
define <vscale x 2 x i1> @masked_load_sext_i8i64_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 {
; CHECK-LABEL: masked_load_sext_i8i64_parg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpeq p2.b, p0/z, z0.b, #0
; CHECK-NEXT: ptrue p1.d, vl32
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p2.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
@@ -172,9 +172,9 @@ define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_all(ptr %ap, <vscale x 16
; CHECK-LABEL: masked_load_sext_i8i16_ptrue_all:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: ptrue p1.h, vl32
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
@@ -191,9 +191,9 @@ define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_all(ptr %ap, <vscale x 16
; CHECK-LABEL: masked_load_sext_i8i32_ptrue_all:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: ptrue p1.s, vl32
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
@@ -211,8 +211,8 @@ define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_all(ptr %ap, <vscale x 16
; CHECK-LABEL: masked_load_sext_i8i64_ptrue_all:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-regalloc-hint-match-cmp.mir b/llvm/test/CodeGen/AArch64/sve-regalloc-hint-match-cmp.mir
new file mode 100644
index 0000000000000..86e9337a1b394
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-regalloc-hint-match-cmp.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -start-before=greedy -stop-after=virtregrewriter -o - %s | FileCheck %s
+--- |
+ target triple = "aarch64"
+
+ define aarch64_sve_vector_pcs void @regalloc_hint_cmp_match_dont_introduce_csr_spill() #0 { unreachable }
+ define aarch64_sve_vector_pcs void @regalloc_hint_cmp_match_reuse_p4() #0 { unreachable }
+
+ attributes #0 = { "target-features"="+sve" }
+...
+
+# When suggesting a regalloc hint to choose a distinct destination register for %0,
+# avoid using a callee-save register as that will require a spill/fill.
+---
+name: regalloc_hint_cmp_match_dont_introduce_csr_spill
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $z0, $z1, $p0, $p1, $p2, $p3
+
+ ; CHECK-LABEL: name: regalloc_hint_cmp_match_dont_introduce_csr_spill
+ ; CHECK: liveins: $p0, $p1, $p2, $p3, $x0, $z0, $z1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $p0 = CMPEQ_PPzZZ_B $p0, $z0, $z1, implicit-def dead $nzcv
+ ; CHECK-NEXT: FAKE_USE $p1
+ ; CHECK-NEXT: FAKE_USE $p2
+ ; CHECK-NEXT: FAKE_USE $p3
+ ; CHECK-NEXT: ST1B_IMM $z0, killed renamable $p0, $x0, 0
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:ppr_3b = CMPEQ_PPzZZ_B $p0, $z0, $z1, implicit-def dead $nzcv
+ FAKE_USE $p1
+ FAKE_USE $p2
+ FAKE_USE $p3
+ ST1B_IMM $z0, %0, $x0, 0
+ RET_ReallyLR
+...
+
+# For this test, the register allocator can safely reuse $p4 because it was already clobbered
+# for other purposes.
+---
+name: regalloc_hint_cmp_match_reuse_p4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $z0, $z1, $p0, $p1, $p2, $p3
+
+ ; CHECK-LABEL: name: regalloc_hint_cmp_match_reuse_p4
+ ; CHECK: liveins: $p0, $p1, $p2, $p3, $x0, $z0, $z1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $p4 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $p4 = CMPEQ_PPzZZ_B $p0, $z0, $z1, implicit-def dead $nzcv
+ ; CHECK-NEXT: FAKE_USE $p1
+ ; CHECK-NEXT: FAKE_USE $p2
+ ; CHECK-NEXT: FAKE_USE $p3
+ ; CHECK-NEXT: ST1B_IMM $z0, killed renamable $p4, $x0, 0
+ ; CHECK-NEXT: RET_ReallyLR
+ $p4 = IMPLICIT_DEF
+ %0:ppr_3b = CMPEQ_PPzZZ_B $p0, $z0, $z1, implicit-def dead $nzcv
+ FAKE_USE $p1
+ FAKE_USE $p2
+ FAKE_USE $p3
+ ST1B_IMM $z0, %0, $x0, 0
+ RET_ReallyLR
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-scmp.ll b/llvm/test/CodeGen/AArch64/sve-scmp.ll
index 2083ddd8c3837..5ec6d1357db1c 100644
--- a/llvm/test/CodeGen/AArch64/sve-scmp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-scmp.ll
@@ -8,9 +8,9 @@ define <vscale x 8 x i8> @s_nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT: cmpgt p2.h, p0/z, z1.h, z0.h
; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.h, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.h, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 8 x i8> @llvm.scmp(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b)
@@ -22,9 +22,9 @@ define <vscale x 16 x i8> @s_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cmpgt p1.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: cmpgt p0.b, p0/z, z1.b, z0.b
+; CHECK-NEXT: cmpgt p2.b, p0/z, z1.b, z0.b
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.b, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.b, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 16 x i8> @llvm.scmp(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -38,9 +38,9 @@ define <vscale x 4 x i16> @s_nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %
; CHECK-NEXT: sxth z0.s, p0/m, z0.s
; CHECK-NEXT: sxth z1.s, p0/m, z1.s
; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: cmpgt p2.s, p0/z, z1.s, z0.s
; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.s, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.s, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 4 x i16> @llvm.scmp(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b)
@@ -52,9 +52,9 @@ define <vscale x 8 x i16> @s_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT: cmpgt p2.h, p0/z, z1.h, z0.h
; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.h, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.h, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 8 x i16> @llvm.scmp(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -86,9 +86,9 @@ define <vscale x 2 x i32> @s_nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %
; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
; CHECK-NEXT: cmpgt p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: cmpgt p2.d, p0/z, z1.d, z0.d
; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.d, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.d, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 2 x i32> @llvm.scmp(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b)
@@ -100,9 +100,9 @@ define <vscale x 4 x i32> @s_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: cmpgt p2.s, p0/z, z1.s, z0.s
; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.s, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.s, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 4 x i32> @llvm.scmp(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -132,9 +132,9 @@ define <vscale x 2 x i64> @s_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmpgt p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: cmpgt p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: cmpgt p2.d, p0/z, z1.d, z0.d
; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.d, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.d, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 2 x i64> @llvm.scmp(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll
index e0ea173cd28e4..513613b741cd4 100644
--- a/llvm/test/CodeGen/AArch64/sve-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-select.ll
@@ -690,8 +690,8 @@ define <vscale x 4 x float> @select_f32_invert_fsub(<vscale x 4 x float> %a, <vs
; CHECK-LABEL: select_f32_invert_fsub:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: fsub z0.s, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
%p = icmp eq <vscale x 4 x i32> %c, zeroinitializer
%fsub = fsub <vscale x 4 x float> %a, %b
diff --git a/llvm/test/CodeGen/AArch64/sve-setcc.ll b/llvm/test/CodeGen/AArch64/sve-setcc.ll
index d4c17c1bd838c..30e2a77f32120 100644
--- a/llvm/test/CodeGen/AArch64/sve-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-setcc.ll
@@ -107,8 +107,8 @@ define <vscale x 16 x i1> @sve_cmpne_setcc_equal_pred(<vscale x 16 x i8> %vec, <
define <vscale x 16 x i1> @sve_cmpne_setcc_different_pred(<vscale x 16 x i8> %vec, <vscale x 16 x i1> %pg1, <vscale x 16 x i1> %pg2) {
; CHECK-LABEL: sve_cmpne_setcc_different_pred:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT: cmpne p2.b, p0/z, z0.b, #0
+; CHECK-NEXT: and p0.b, p2/z, p2.b, p1.b
; CHECK-NEXT: ret
%cmp1 = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg1, <vscale x 16 x i8> %vec, <vscale x 16 x i8> zeroinitializer)
%cmp1.sext = sext <vscale x 16 x i1> %cmp1 to <vscale x 16 x i8>
diff --git a/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll b/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll
index a2429d4975b7a..e503f10fd2834 100644
--- a/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll
@@ -12,8 +12,8 @@ define <vscale x 2 x i8> @smulo_nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: sxtb z1.d, p0/m, z0.d
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i8>, <vscale x 2 x i1> } @llvm.smul.with.overflow.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y)
%b = extractvalue { <vscale x 2 x i8>, <vscale x 2 x i1> } %a, 0
@@ -33,8 +33,8 @@ define <vscale x 4 x i8> @smulo_nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: sxtb z1.s, p0/m, z0.s
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 4 x i8>, <vscale x 4 x i1> } @llvm.smul.with.overflow.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y)
%b = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i1> } %a, 0
@@ -54,8 +54,8 @@ define <vscale x 8 x i8> @smulo_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: sxtb z1.h, p0/m, z0.h
-; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, z0.h
+; CHECK-NEXT: mov z0.h, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.smul.with.overflow.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y)
%b = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 0
@@ -74,8 +74,8 @@ define <vscale x 16 x i8> @smulo_nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-NEXT: mul z2.b, p0/m, z2.b, z1.b
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: asr z1.b, z2.b, #7
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: mov z2.b, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: mov z2.b, p1/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%a = call { <vscale x 16 x i8>, <vscale x 16 x i1> } @llvm.smul.with.overflow.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y)
@@ -100,8 +100,8 @@ define <vscale x 32 x i8> @smulo_nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i
; CHECK-NEXT: asr z2.b, z4.b, #7
; CHECK-NEXT: asr z3.b, z5.b, #7
; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, z2.b
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z3.b
-; CHECK-NEXT: mov z5.b, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p2.b, p0/z, z0.b, z3.b
+; CHECK-NEXT: mov z5.b, p2/m, #0 // =0x0
; CHECK-NEXT: mov z4.b, p1/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: mov z1.d, z4.d
@@ -166,8 +166,8 @@ define <vscale x 2 x i16> @smulo_nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i1
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: sxth z1.d, p0/m, z0.d
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i16>, <vscale x 2 x i1> } @llvm.smul.with.overflow.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y)
%b = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i1> } %a, 0
@@ -187,8 +187,8 @@ define <vscale x 4 x i16> @smulo_nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i1
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: sxth z1.s, p0/m, z0.s
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 4 x i16>, <vscale x 4 x i1> } @llvm.smul.with.overflow.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y)
%b = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i1> } %a, 0
@@ -207,8 +207,8 @@ define <vscale x 8 x i16> @smulo_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: asr z1.h, z2.h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z2.h, p1/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%a = call { <vscale x 8 x i16>, <vscale x 8 x i1> } @llvm.smul.with.overflow.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y)
@@ -233,8 +233,8 @@ define <vscale x 16 x i16> @smulo_nxv16i16(<vscale x 16 x i16> %x, <vscale x 16
; CHECK-NEXT: asr z2.h, z4.h, #15
; CHECK-NEXT: asr z3.h, z5.h, #15
; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, z2.h
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z3.h
-; CHECK-NEXT: mov z5.h, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p2.h, p0/z, z0.h, z3.h
+; CHECK-NEXT: mov z5.h, p2/m, #0 // =0x0
; CHECK-NEXT: mov z4.h, p1/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: mov z1.d, z4.d
@@ -299,8 +299,8 @@ define <vscale x 2 x i32> @smulo_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i3
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: sxtw z1.d, p0/m, z0.d
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.smul.with.overflow.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y)
%b = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i1> } %a, 0
@@ -319,8 +319,8 @@ define <vscale x 4 x i32> @smulo_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: asr z1.s, z2.s, #31
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%a = call { <vscale x 4 x i32>, <vscale x 4 x i1> } @llvm.smul.with.overflow.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
@@ -345,8 +345,8 @@ define <vscale x 8 x i32> @smulo_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i3
; CHECK-NEXT: asr z2.s, z4.s, #31
; CHECK-NEXT: asr z3.s, z5.s, #31
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z3.s
-; CHECK-NEXT: mov z5.s, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, z3.s
+; CHECK-NEXT: mov z5.s, p2/m, #0 // =0x0
; CHECK-NEXT: mov z4.s, p1/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: mov z1.d, z4.d
@@ -410,8 +410,8 @@ define <vscale x 2 x i64> @smulo_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: asr z1.d, z2.d, #63
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i64>, <vscale x 2 x i1> } @llvm.smul.with.overflow.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
@@ -436,8 +436,8 @@ define <vscale x 4 x i64> @smulo_nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i6
; CHECK-NEXT: asr z2.d, z4.d, #63
; CHECK-NEXT: asr z3.d, z5.d, #63
; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z3.d
-; CHECK-NEXT: mov z5.d, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, z3.d
+; CHECK-NEXT: mov z5.d, p2/m, #0 // =0x0
; CHECK-NEXT: mov z4.d, p1/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: mov z1.d, z4.d
diff --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
index 4ed59bc67db0c..6116473d9588e 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
@@ -9,8 +9,8 @@ define <vscale x 8 x i8> @promote_insert_8i8(<vscale x 8 x i8> %a, i8 %elt, i64
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w1
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, w0
+; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p1/m, w0
; CHECK-NEXT: ret
%ins = insertelement <vscale x 8 x i8> %a, i8 %elt, i64 %idx
ret <vscale x 8 x i8> %ins
@@ -101,8 +101,8 @@ define <vscale x 4 x i16> @promote_insert_4i16(<vscale x 4 x i16> %a, i16 %elt)
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, w0
+; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p1/m, w0
; CHECK-NEXT: ret
%ins = insertelement <vscale x 4 x i16> %a, i16 %elt, i64 5
ret <vscale x 4 x i16> %ins
@@ -118,8 +118,8 @@ define <vscale x 32 x i8> @split_insert_32i8(<vscale x 32 x i8> %a, i8 %elt) {
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z3.b, w8
-; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b
-; CHECK-NEXT: mov z0.b, p0/m, w0
+; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
+; CHECK-NEXT: mov z0.b, p1/m, w0
; CHECK-NEXT: ret
%ins = insertelement <vscale x 32 x i8> %a, i8 %elt, i64 3
ret <vscale x 32 x i8> %ins
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index 11446df7dbe4f..957a0a4e8e31f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -11,8 +11,8 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) {
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2f16:
@@ -50,8 +50,8 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) {
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4f16:
@@ -89,8 +89,8 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) {
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v8f16:
@@ -143,13 +143,13 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: mov z0.h, w2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z0.h, z0.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x0, #16]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h
-; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h
+; CHECK-NEXT: sel z0.h, p1, z0.h, z2.h
+; CHECK-NEXT: sel z1.h, p1, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -246,8 +246,8 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) {
; CHECK-NEXT: and w8, w0, #0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2f32:
@@ -275,8 +275,8 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) {
; CHECK-NEXT: and w8, w0, #0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4f32:
@@ -309,13 +309,13 @@ define void @select_v8f32(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x0, #16]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
-; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s
+; CHECK-NEXT: sel z0.s, p1, z0.s, z2.s
+; CHECK-NEXT: sel z1.s, p1, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -391,8 +391,8 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
; CHECK-NEXT: and x8, x0, #0x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2f64:
@@ -419,13 +419,13 @@ define void @select_v4f64(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: and x8, x2, #0x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x0, #16]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
-; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
+; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d
+; CHECK-NEXT: sel z1.d, p1, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index 1030e96939852..e5cd5321dbb54 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -14,8 +14,8 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask
; CHECK-NEXT: lsl z2.h, z2.h, #15
; CHECK-NEXT: asr z2.h, z2.h, #15
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2f16:
@@ -53,8 +53,8 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: asr z2.h, z2.h, #15
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4f16:
@@ -106,8 +106,8 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask
; CHECK-NEXT: lsl z2.h, z2.h, #15
; CHECK-NEXT: asr z2.h, z2.h, #15
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v8f16:
@@ -330,8 +330,8 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: asr z2.s, z2.s, #31
; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2f32:
@@ -365,8 +365,8 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m
; CHECK-NEXT: lsl z2.s, z2.s, #31
; CHECK-NEXT: asr z2.s, z2.s, #31
; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4f32:
@@ -496,8 +496,8 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1>
; CHECK-NEXT: lsl z2.d, z2.d, #63
; CHECK-NEXT: asr z2.d, z2.d, #63
; CHECK-NEXT: and z2.d, z2.d, #0x1
-; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index 5f21c80c2fdd0..256932fd89099 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -18,8 +18,8 @@ define <4 x i8> @insertelement_v4i8(<4 x i8> %op1) {
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, w8
+; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p1/m, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -54,8 +54,8 @@ define <8 x i8> @insertelement_v8i8(<8 x i8> %op1) {
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, w8
+; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
+; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -92,8 +92,8 @@ define <16 x i8> @insertelement_v16i8(<16 x i8> %op1) {
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
-; CHECK-NEXT: mov z0.b, p0/m, w8
+; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
+; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -132,8 +132,8 @@ define <32 x i8> @insertelement_v32i8(<32 x i8> %op1) {
; CHECK-NEXT: mov z3.b, w8
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b
-; CHECK-NEXT: mov z1.b, p0/m, w8
+; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
+; CHECK-NEXT: mov z1.b, p1/m, w8
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
;
@@ -173,8 +173,8 @@ define <2 x i16> @insertelement_v2i16(<2 x i16> %op1) {
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, w8
+; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p1/m, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -206,8 +206,8 @@ define <4 x i16> @insertelement_v4i16(<4 x i16> %op1) {
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, w8
+; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p1/m, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -242,8 +242,8 @@ define <8 x i16> @insertelement_v8i16(<8 x i16> %op1) {
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, w8
+; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p1/m, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -280,8 +280,8 @@ define <16 x i16> @insertelement_v16i16(<16 x i16> %op1) {
; CHECK-NEXT: mov z3.h, w8
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
-; CHECK-NEXT: mov z1.h, p0/m, w8
+; CHECK-NEXT: cmpeq p1.h, p0/z, z2.h, z3.h
+; CHECK-NEXT: mov z1.h, p1/m, w8
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
;
@@ -319,8 +319,8 @@ define <2 x i32> @insertelement_v2i32(<2 x i32> %op1) {
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, w8
+; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p1/m, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -352,8 +352,8 @@ define <4 x i32> @insertelement_v4i32(<4 x i32> %op1) {
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, w8
+; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p1/m, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -386,9 +386,9 @@ define <8 x i32> @insertelement_v8i32(ptr %a) {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: mov z1.s, p0/m, w8
+; CHECK-NEXT: mov z1.s, p1/m, w8
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
;
@@ -442,8 +442,8 @@ define <2 x i64> @insertelement_v2i64(<2 x i64> %op1) {
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: mov z0.d, p0/m, x8
+; CHECK-NEXT: cmpeq p1.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: mov z0.d, p1/m, x8
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -474,9 +474,9 @@ define <4 x i64> @insertelement_v4i64(ptr %a) {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: mov w8, #5 // =0x5
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: mov z1.d, p0/m, x8
+; CHECK-NEXT: mov z1.d, p1/m, x8
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
;
@@ -534,9 +534,9 @@ define <4 x half> @insertelement_v4f16(<4 x half> %op1) {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: fmov h1, #5.00000000
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: mov z0.h, p1/m, h1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -571,9 +571,9 @@ define <8 x half> @insertelement_v8f16(<8 x half> %op1) {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: fmov h1, #5.00000000
-; CHECK-NEXT: mov z0.h, p0/m, h1
+; CHECK-NEXT: mov z0.h, p1/m, h1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -610,9 +610,9 @@ define <16 x half> @insertelement_v16f16(ptr %a) {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: fmov h2, #5.00000000
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: mov z1.h, p0/m, h2
+; CHECK-NEXT: mov z1.h, p1/m, h2
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
;
@@ -651,9 +651,9 @@ define <2 x float> @insertelement_v2f32(<2 x float> %op1) {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: fmov s1, #5.00000000
-; CHECK-NEXT: mov z0.s, p0/m, s1
+; CHECK-NEXT: mov z0.s, p1/m, s1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -684,9 +684,9 @@ define <4 x float> @insertelement_v4f32(<4 x float> %op1) {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: fmov s1, #5.00000000
-; CHECK-NEXT: mov z0.s, p0/m, s1
+; CHECK-NEXT: mov z0.s, p1/m, s1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -719,9 +719,9 @@ define <8 x float> @insertelement_v8f32(ptr %a) {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: fmov s2, #5.00000000
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: mov z1.s, p0/m, s2
+; CHECK-NEXT: mov z1.s, p1/m, s2
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
;
@@ -774,9 +774,9 @@ define <2 x double> @insertelement_v2f64(<2 x double> %op1) {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: cmpeq p1.d, p0/z, z1.d, z2.d
; CHECK-NEXT: fmov d1, #5.00000000
-; CHECK-NEXT: mov z0.d, p0/m, d1
+; CHECK-NEXT: mov z0.d, p1/m, d1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -807,9 +807,9 @@ define <4 x double> @insertelement_v4f64(ptr %a) {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: fmov d2, #5.00000000
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: mov z1.d, p0/m, d2
+; CHECK-NEXT: mov z1.d, p1/m, d2
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
index d24f4144d4983..72c4fe528db32 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
@@ -13,8 +13,8 @@ define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-LABEL: icmp_eq_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: icmp_eq_v8i8:
@@ -74,8 +74,8 @@ define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-LABEL: icmp_eq_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: icmp_eq_v16i8:
@@ -177,9 +177,9 @@ define void @icmp_eq_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b
+; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.b, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -367,8 +367,8 @@ define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-LABEL: icmp_eq_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: icmp_eq_v4i16:
@@ -408,8 +408,8 @@ define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-LABEL: icmp_eq_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: icmp_eq_v8i16:
@@ -471,9 +471,9 @@ define void @icmp_eq_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
+; CHECK-NEXT: cmpeq p2.h, p0/z, z2.h, z3.h
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -581,8 +581,8 @@ define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-LABEL: icmp_eq_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: icmp_eq_v2i32:
@@ -610,8 +610,8 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-LABEL: icmp_eq_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: icmp_eq_v4i32:
@@ -649,9 +649,9 @@ define void @icmp_eq_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
+; CHECK-NEXT: cmpeq p2.s, p0/z, z2.s, z3.s
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -711,8 +711,8 @@ define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-LABEL: icmp_eq_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: icmp_eq_v1i64:
@@ -735,8 +735,8 @@ define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-LABEL: icmp_eq_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: icmp_eq_v2i64:
@@ -766,9 +766,9 @@ define void @icmp_eq_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpeq p1.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: cmpeq p2.d, p0/z, z2.d, z3.d
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -819,9 +819,9 @@ define void @icmp_ne_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, z0.b
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, z3.b
+; CHECK-NEXT: cmpne p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.b, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -1015,8 +1015,8 @@ define void @icmp_sge_v8i16(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: cmpge p0.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
;
@@ -1089,9 +1089,9 @@ define void @icmp_sgt_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpgt p1.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, z3.h
+; CHECK-NEXT: cmpgt p2.h, p0/z, z2.h, z3.h
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -1205,8 +1205,8 @@ define void @icmp_sle_v4i32(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: cmpge p0.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpge p1.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
;
@@ -1255,9 +1255,9 @@ define void @icmp_slt_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, z2.s
+; CHECK-NEXT: cmpgt p2.s, p0/z, z3.s, z2.s
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -1323,8 +1323,8 @@ define void @icmp_uge_v2i64(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: cmphs p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmphs p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
;
@@ -1364,8 +1364,8 @@ define void @icmp_ugt_v2i64(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: cmphi p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmphi p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
;
@@ -1405,8 +1405,8 @@ define void @icmp_ule_v2i64(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: cmphs p0.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmphs p1.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
;
@@ -1446,8 +1446,8 @@ define void @icmp_ult_v2i64(ptr %a, ptr %b) {
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmphi p1.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
index 2e8d91e7083bf..fd5a618b0d151 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
@@ -843,9 +843,9 @@ define void @icmp_eq_v32i8(ptr %a) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #7
-; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, #7
+; CHECK-NEXT: cmpeq p2.b, p0/z, z1.b, #7
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.b, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -999,9 +999,9 @@ define void @icmp_sge_v16i16(ptr %a) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, #15
-; CHECK-NEXT: cmpge p0.h, p0/z, z1.h, #15
+; CHECK-NEXT: cmpge p2.h, p0/z, z1.h, #15
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -1091,9 +1091,9 @@ define void @icmp_sgt_v8i32(ptr %a) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, #-8
-; CHECK-NEXT: cmpgt p0.s, p0/z, z1.s, #-8
+; CHECK-NEXT: cmpgt p2.s, p0/z, z1.s, #-8
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -1147,9 +1147,9 @@ define void @icmp_ult_v4i64(ptr %a) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: cmplo p1.d, p0/z, z0.d, #63
-; CHECK-NEXT: cmplo p0.d, p0/z, z1.d, #63
+; CHECK-NEXT: cmplo p2.d, p0/z, z1.d, #63
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
index 1c637bbb31889..299e890eabbf2 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
@@ -11,8 +11,8 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) {
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4i8:
@@ -49,8 +49,8 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.b, w0
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v8i8:
@@ -103,8 +103,8 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.b, w0
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v16i8:
@@ -188,13 +188,13 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, w2
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x0, #16]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b
-; CHECK-NEXT: sel z1.b, p0, z1.b, z3.b
+; CHECK-NEXT: sel z0.b, p1, z0.b, z2.b
+; CHECK-NEXT: sel z1.b, p1, z1.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -355,8 +355,8 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) {
; CHECK-NEXT: and w8, w0, #0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2i16:
@@ -384,8 +384,8 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) {
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4i16:
@@ -423,8 +423,8 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) {
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v8i16:
@@ -477,13 +477,13 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: mov z0.h, w2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z0.h, z0.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x0, #16]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h
-; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h
+; CHECK-NEXT: sel z0.h, p1, z0.h, z2.h
+; CHECK-NEXT: sel z1.h, p1, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -580,8 +580,8 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) {
; CHECK-NEXT: and w8, w0, #0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2i32:
@@ -609,8 +609,8 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) {
; CHECK-NEXT: and w8, w0, #0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4i32:
@@ -643,13 +643,13 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: and w8, w2, #0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z0.s, w8
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x0, #16]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
-; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s
+; CHECK-NEXT: sel z0.s, p1, z0.s, z2.s
+; CHECK-NEXT: sel z1.s, p1, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -706,8 +706,8 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) {
; CHECK-NEXT: and x8, x0, #0x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v1i64:
@@ -731,8 +731,8 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) {
; CHECK-NEXT: and x8, x0, #0x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2i64:
@@ -759,13 +759,13 @@ define void @select_v4i64(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: and x8, x2, #0x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, x8
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x0, #16]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
-; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
+; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d
+; CHECK-NEXT: sel z1.d, p1, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index 509cac0248a0a..ff78732434dc8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -12,8 +12,8 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: asr z2.h, z2.h, #15
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4i8:
@@ -64,8 +64,8 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) {
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: asr z2.b, z2.b, #7
; CHECK-NEXT: and z2.b, z2.b, #0x1
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v8i8:
@@ -144,8 +144,8 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: asr z2.b, z2.b, #7
; CHECK-NEXT: and z2.b, z2.b, #0x1
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: cmpne p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v16i8:
@@ -281,9 +281,9 @@ define void @select_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q2, q1, [x0]
; CHECK-NEXT: mov w8, #16 // =0x10
; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, z0.b
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, z3.b
+; CHECK-NEXT: cmpne p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: st1b { z0.b }, p1, [x0, x8]
-; CHECK-NEXT: st1b { z3.b }, p0, [x0]
+; CHECK-NEXT: st1b { z3.b }, p2, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v32i8:
@@ -502,8 +502,8 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: asr z2.s, z2.s, #31
; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2i16:
@@ -539,8 +539,8 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) {
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: asr z2.h, z2.h, #15
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4i16:
@@ -592,8 +592,8 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) {
; CHECK-NEXT: lsl z2.h, z2.h, #15
; CHECK-NEXT: asr z2.h, z2.h, #15
; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v8i16:
@@ -673,9 +673,9 @@ define void @select_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q2, q1, [x0]
; CHECK-NEXT: mov x8, #8 // =0x8
; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, z3.h
+; CHECK-NEXT: cmpne p2.h, p0/z, z2.h, z3.h
; CHECK-NEXT: st1h { z0.h }, p1, [x0, x8, lsl #1]
-; CHECK-NEXT: st1h { z3.h }, p0, [x0]
+; CHECK-NEXT: st1h { z3.h }, p2, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v16i16:
@@ -788,8 +788,8 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: asr z2.s, z2.s, #31
; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2i32:
@@ -826,8 +826,8 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) {
; CHECK-NEXT: lsl z2.s, z2.s, #31
; CHECK-NEXT: asr z2.s, z2.s, #31
; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4i32:
@@ -879,9 +879,9 @@ define void @select_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q2, q1, [x0]
; CHECK-NEXT: mov x8, #4 // =0x4
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, z3.s
+; CHECK-NEXT: cmpne p2.s, p0/z, z2.s, z3.s
; CHECK-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2]
-; CHECK-NEXT: st1w { z3.s }, p0, [x0]
+; CHECK-NEXT: st1w { z3.s }, p2, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v8i32:
@@ -938,8 +938,8 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) {
; CHECK-NEXT: and x8, x0, #0x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v1i64:
@@ -965,8 +965,8 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) {
; CHECK-NEXT: lsl z2.d, z2.d, #63
; CHECK-NEXT: asr z2.d, z2.d, #63
; CHECK-NEXT: and z2.d, z2.d, #0x1
-; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v2i64:
@@ -1003,9 +1003,9 @@ define void @select_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q2, q1, [x0]
; CHECK-NEXT: mov x8, #2 // =0x2
; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, z3.d
; CHECK-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3]
-; CHECK-NEXT: st1d { z3.d }, p0, [x0]
+; CHECK-NEXT: st1d { z3.d }, p2, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: select_v4i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
index 45f2dbc98b953..593ec1113cc19 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -38,9 +38,9 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
; CHECK-NEXT: mov z1.d, z1.d[1]
; CHECK-NEXT: mov z3.d, x8
; CHECK-NEXT: fmov x8, d1
-; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: cmpeq p1.d, p0/z, z2.d, z3.d
; CHECK-NEXT: ldr x8, [x8]
-; CHECK-NEXT: mov z0.d, p0/m, x8
+; CHECK-NEXT: mov z0.d, p1/m, x8
; CHECK-NEXT: .LBB0_4: // %else2
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
index 64762e48d0863..ccbe8e105de4f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
@@ -16,8 +16,8 @@ define <4 x i8> @masked_load_v4i8(ptr %src, <4 x i1> %mask) {
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: lsl z0.h, z0.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: ld1b { z0.h }, p1/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -112,8 +112,8 @@ define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) {
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: lsl z0.b, z0.b, #7
; CHECK-NEXT: asr z0.b, z0.b, #7
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -307,8 +307,8 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: lsl z0.b, z0.b, #7
; CHECK-NEXT: asr z0.b, z0.b, #7
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -743,8 +743,8 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) {
; CHECK-NEXT: asr z0.b, z0.b, #7
; CHECK-NEXT: asr z1.b, z1.b, #7
; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: cmpne p2.b, p0/z, z1.b, #0
+; CHECK-NEXT: ld1b { z0.b }, p2/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0, x8]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
@@ -1475,8 +1475,8 @@ define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) {
; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
; CHECK-NEXT: lsl z0.h, z0.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -1524,8 +1524,8 @@ define <4 x half> @masked_load_v4f16(ptr %src, <4 x i1> %mask) {
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: lsl z0.h, z0.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -1622,8 +1622,8 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: lsl z0.h, z0.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -1826,10 +1826,10 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) {
; CHECK-NEXT: asr z0.h, z0.h, #15
; CHECK-NEXT: asr z1.h, z1.h, #15
; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
-; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
+; CHECK-NEXT: cmpne p2.h, p0/z, z1.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
+; CHECK-NEXT: ld1h { z1.h }, p2/z, [x0, x8, lsl #1]
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
;
@@ -2172,8 +2172,8 @@ define <2 x float> @masked_load_v2f32(ptr %src, <2 x i1> %mask) {
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: lsl z0.s, z0.s, #31
; CHECK-NEXT: asr z0.s, z0.s, #31
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -2224,8 +2224,8 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: lsl z0.s, z0.s, #31
; CHECK-NEXT: asr z0.s, z0.s, #31
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -2336,9 +2336,9 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) {
; CHECK-NEXT: asr z0.s, z0.s, #31
; CHECK-NEXT: asr z1.s, z1.s, #31
; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT: cmpne p2.s, p0/z, z1.s, #0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
-; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
+; CHECK-NEXT: ld1w { z1.s }, p2/z, [x0, x8, lsl #2]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
@@ -2522,8 +2522,8 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: lsl z0.d, z0.d, #63
; CHECK-NEXT: asr z0.d, z0.d, #63
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -2582,10 +2582,10 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) {
; CHECK-NEXT: asr z0.d, z0.d, #63
; CHECK-NEXT: asr z1.d, z1.d, #63
; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpne p2.d, p0/z, z1.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
+; CHECK-NEXT: ld1d { z1.d }, p2/z, [x0, x8, lsl #3]
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
;
@@ -2680,8 +2680,8 @@ define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
; CHECK-NEXT: lsl z0.h, z0.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ld1h { z0.s }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -2753,8 +2753,8 @@ define <3 x i32> @masked_load_sext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
; CHECK-NEXT: lsl z0.h, z0.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
index 8f4a696a28d62..0fa6fca1065c6 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
@@ -16,9 +16,9 @@ define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) {
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: lsl z0.h, z0.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
; CHECK-NEXT: mov z0.h, #0 // =0x0
-; CHECK-NEXT: st1b { z0.h }, p0, [x0]
+; CHECK-NEXT: st1b { z0.h }, p1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v4i8:
@@ -72,9 +72,9 @@ define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) {
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: lsl z0.b, z0.b, #7
; CHECK-NEXT: asr z0.b, z0.b, #7
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
; CHECK-NEXT: mov z0.b, #0 // =0x0
-; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: st1b { z0.b }, p1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v8i8:
@@ -165,9 +165,9 @@ define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: lsl z0.b, z0.b, #7
; CHECK-NEXT: asr z0.b, z0.b, #7
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
; CHECK-NEXT: mov z0.b, #0 // =0x0
-; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: st1b { z0.b }, p1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v16i8:
@@ -360,10 +360,10 @@ define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) {
; CHECK-NEXT: asr z0.b, z0.b, #7
; CHECK-NEXT: asr z1.b, z1.b, #7
; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
+; CHECK-NEXT: cmpne p2.b, p0/z, z1.b, #0
; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: st1b { z0.b }, p1, [x0, x8]
-; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: st1b { z0.b }, p2, [x0]
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
;
@@ -598,9 +598,9 @@ define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) {
; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
; CHECK-NEXT: lsl z0.h, z0.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
; CHECK-NEXT: mov z0.h, #0 // =0x0
-; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v2f16:
@@ -638,9 +638,9 @@ define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) {
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: lsl z0.h, z0.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
; CHECK-NEXT: mov z0.h, #0 // =0x0
-; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v4f16:
@@ -699,9 +699,9 @@ define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: lsl z0.h, z0.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
; CHECK-NEXT: mov z0.h, #0 // =0x0
-; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: st1h { z0.h }, p1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v8f16:
@@ -807,11 +807,11 @@ define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
; CHECK-NEXT: lsl z1.h, z1.h, #15
; CHECK-NEXT: asr z0.h, z0.h, #15
; CHECK-NEXT: asr z1.h, z1.h, #15
+; CHECK-NEXT: cmpne p2.h, p0/z, z0.h, #0
; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: mov z1.h, #0 // =0x0
; CHECK-NEXT: st1h { z1.h }, p1, [x0, x8, lsl #1]
-; CHECK-NEXT: st1h { z1.h }, p0, [x0]
+; CHECK-NEXT: st1h { z1.h }, p2, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v16f16:
@@ -958,9 +958,9 @@ define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: lsl z0.s, z0.s, #31
; CHECK-NEXT: asr z0.s, z0.s, #31
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
; CHECK-NEXT: mov z0.s, #0 // =0x0
-; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: st1w { z0.s }, p1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v4f32:
@@ -1034,9 +1034,9 @@ define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) {
; CHECK-NEXT: asr z0.s, z0.s, #31
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
; CHECK-NEXT: mov z1.s, #0 // =0x0
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
; CHECK-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
-; CHECK-NEXT: st1w { z1.s }, p0, [x0]
+; CHECK-NEXT: st1w { z1.s }, p2, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v8f32:
@@ -1128,9 +1128,9 @@ define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: lsl z0.d, z0.d, #63
; CHECK-NEXT: asr z0.d, z0.d, #63
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT: mov z0.d, #0 // =0x0
-; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: st1d { z0.d }, p1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v2f64:
@@ -1174,11 +1174,11 @@ define void @masked_store_v4f64(ptr %dst, <4 x i1> %mask) {
; CHECK-NEXT: lsl z1.d, z1.d, #63
; CHECK-NEXT: asr z0.d, z0.d, #63
; CHECK-NEXT: asr z1.d, z1.d, #63
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, #0
; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: st1d { z1.d }, p1, [x0, x8, lsl #3]
-; CHECK-NEXT: st1d { z1.d }, p0, [x0]
+; CHECK-NEXT: st1d { z1.d }, p2, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_store_v4f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll
index 42500b53222a2..1deb6b1343194 100644
--- a/llvm/test/CodeGen/AArch64/sve-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll
@@ -158,8 +158,8 @@ define <vscale x 4 x i1> @trunc_i64toi1_split(<vscale x 4 x i64> %in) {
; CHECK-NEXT: and z0.d, z0.d, #0x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
+; CHECK-NEXT: cmpne p2.d, p0/z, z0.d, #0
+; CHECK-NEXT: uzp1 p0.s, p2.s, p1.s
; CHECK-NEXT: ret
entry:
%out = trunc <vscale x 4 x i64> %in to <vscale x 4 x i1>
@@ -276,8 +276,8 @@ define <vscale x 1 x i1> @trunc_nxv1i32_to_nxv1i1(<vscale x 1 x i32> %in) {
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.s, z0.s, #0x1
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: punpklo p0.h, p1.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ret
%out = trunc <vscale x 1 x i32> %in to <vscale x 1 x i1>
diff --git a/llvm/test/CodeGen/AArch64/sve-ucmp.ll b/llvm/test/CodeGen/AArch64/sve-ucmp.ll
index 0ee31821e56ef..daf97cc5f40aa 100644
--- a/llvm/test/CodeGen/AArch64/sve-ucmp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ucmp.ll
@@ -8,9 +8,9 @@ define <vscale x 8 x i8> @u_nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmphi p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT: cmphi p2.h, p0/z, z1.h, z0.h
; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.h, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.h, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 8 x i8> @llvm.ucmp(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b)
@@ -22,9 +22,9 @@ define <vscale x 16 x i8> @u_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cmphi p1.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
+; CHECK-NEXT: cmphi p2.b, p0/z, z1.b, z0.b
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.b, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.b, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 16 x i8> @llvm.ucmp(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -38,9 +38,9 @@ define <vscale x 4 x i16> @u_nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %
; CHECK-NEXT: and z1.s, z1.s, #0xffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: cmphi p2.s, p0/z, z1.s, z0.s
; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.s, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.s, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 4 x i16> @llvm.ucmp(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b)
@@ -52,9 +52,9 @@ define <vscale x 8 x i16> @u_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmphi p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT: cmphi p2.h, p0/z, z1.h, z0.h
; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.h, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.h, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 8 x i16> @llvm.ucmp(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -86,9 +86,9 @@ define <vscale x 2 x i32> @u_nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmphi p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: cmphi p2.d, p0/z, z1.d, z0.d
; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.d, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.d, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 2 x i32> @llvm.ucmp(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b)
@@ -100,9 +100,9 @@ define <vscale x 4 x i32> @u_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: cmphi p2.s, p0/z, z1.s, z0.s
; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.s, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.s, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 4 x i32> @llvm.ucmp(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -132,9 +132,9 @@ define <vscale x 2 x i64> @u_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmphi p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: cmphi p2.d, p0/z, z1.d, z0.d
; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
-; CHECK-NEXT: mov z0.d, p0/m, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.d, p2/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ret
entry:
%c = call <vscale x 2 x i64> @llvm.ucmp(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
index 818f37c85ffdb..754c8a17cee43 100644
--- a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
@@ -11,8 +11,8 @@ define <vscale x 2 x i8> @umulo_nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: lsr z1.d, z0.d, #8
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i8>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y)
%b = extractvalue { <vscale x 2 x i8>, <vscale x 2 x i1> } %a, 0
@@ -31,8 +31,8 @@ define <vscale x 4 x i8> @umulo_nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: lsr z1.s, z0.s, #8
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 4 x i8>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y)
%b = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i1> } %a, 0
@@ -51,8 +51,8 @@ define <vscale x 8 x i8> @umulo_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: lsr z1.h, z0.h, #8
-; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y)
%b = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 0
@@ -70,8 +70,8 @@ define <vscale x 16 x i8> @umulo_nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: umulh z2.b, p0/m, z2.b, z1.b
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.b, p0/z, z2.b, #0
+; CHECK-NEXT: mov z0.b, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 16 x i8>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y)
%b = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i1> } %a, 0
@@ -93,8 +93,8 @@ define <vscale x 32 x i8> @umulo_nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i
; CHECK-NEXT: mul z1.b, p0/m, z1.b, z3.b
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b
; CHECK-NEXT: cmpne p1.b, p0/z, z4.b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z5.b, #0
-; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p2.b, p0/z, z5.b, #0
+; CHECK-NEXT: mov z0.b, p2/m, #0 // =0x0
; CHECK-NEXT: mov z1.b, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 32 x i8>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %y)
@@ -148,8 +148,8 @@ define <vscale x 2 x i16> @umulo_nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: lsr z1.d, z0.d, #16
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i16>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y)
%b = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i1> } %a, 0
@@ -168,8 +168,8 @@ define <vscale x 4 x i16> @umulo_nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: lsr z1.s, z0.s, #16
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 4 x i16>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y)
%b = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i1> } %a, 0
@@ -187,8 +187,8 @@ define <vscale x 8 x i16> @umulo_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: umulh z2.h, p0/m, z2.h, z1.h
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: mov z0.h, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 8 x i16>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y)
%b = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i1> } %a, 0
@@ -210,8 +210,8 @@ define <vscale x 16 x i16> @umulo_nxv16i16(<vscale x 16 x i16> %x, <vscale x 16
; CHECK-NEXT: mul z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h
; CHECK-NEXT: cmpne p1.h, p0/z, z4.h, #0
-; CHECK-NEXT: cmpne p0.h, p0/z, z5.h, #0
-; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p2.h, p0/z, z5.h, #0
+; CHECK-NEXT: mov z0.h, p2/m, #0 // =0x0
; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 16 x i16>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %y)
@@ -265,8 +265,8 @@ define <vscale x 2 x i32> @umulo_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i3
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: lsr z1.d, z0.d, #32
-; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
+; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y)
%b = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i1> } %a, 0
@@ -284,8 +284,8 @@ define <vscale x 4 x i32> @umulo_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: umulh z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0
+; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 4 x i32>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
%b = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } %a, 0
@@ -307,8 +307,8 @@ define <vscale x 8 x i32> @umulo_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i3
; CHECK-NEXT: mul z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: cmpne p1.s, p0/z, z4.s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z5.s, #0
-; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p2.s, p0/z, z5.s, #0
+; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 8 x i32>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y)
@@ -361,8 +361,8 @@ define <vscale x 2 x i64> @umulo_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: umulh z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i64>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
%b = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i1> } %a, 0
@@ -384,8 +384,8 @@ define <vscale x 4 x i64> @umulo_nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i6
; CHECK-NEXT: mul z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d
; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z5.d, #0
-; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT: cmpne p2.d, p0/z, z5.d, #0
+; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 4 x i64>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y)
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
index ece7bef54ddea..8e15385b5783b 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
@@ -197,8 +197,8 @@ define <4 x i32> @test_compress_v4i32_with_sve(<4 x i32> %vec, <4 x i1> %mask) {
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p1, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> poison)
@@ -213,8 +213,8 @@ define <4 x i16> @test_compress_v4i16_with_sve(<4 x i16> %vec, <4 x i1> %mask) {
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p1, z0.s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%out = call <4 x i16> @llvm.experimental.vector.compress(<4 x i16> %vec, <4 x i1> %mask, <4 x i16> poison)
@@ -229,8 +229,8 @@ define <4 x half> @test_compress_v4f16_with_sve(<4 x half> %vec, <4 x i1> %mask)
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p1, z0.s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%out = call <4 x half> @llvm.experimental.vector.compress(<4 x half> %vec, <4 x i1> %mask, <4 x half> poison)
@@ -245,8 +245,8 @@ define <4 x bfloat> @test_compress_v4bf16_with_sve(<4 x bfloat> %vec, <4 x i1> %
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p1, z0.s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%out = call <4 x bfloat> @llvm.experimental.vector.compress(<4 x bfloat> %vec, <4 x i1> %mask, <4 x bfloat> poison)
@@ -261,8 +261,8 @@ define <1 x i32> @test_compress_v1i32_with_sve(<1 x i32> %vec, <1 x i1> %mask) {
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov v1.s[0], w8
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p1, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%out = call <1 x i32> @llvm.experimental.vector.compress(<1 x i32> %vec, <1 x i1> %mask, <1 x i32> poison)
@@ -287,11 +287,11 @@ define <4 x double> @test_compress_v4f64_with_sve(<4 x double> %vec, <4 x i1> %m
; CHECK-NEXT: shl v4.2d, v5.2d, #63
; CHECK-NEXT: addp v2.2s, v2.2s, v2.2s
; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z4.d, #0
+; CHECK-NEXT: cmpne p2.d, p0/z, z4.d, #0
; CHECK-NEXT: fmov w8, s2
; CHECK-NEXT: compact z0.d, p1, z0.d
; CHECK-NEXT: and x8, x8, #0x3
-; CHECK-NEXT: compact z1.d, p0, z1.d
+; CHECK-NEXT: compact z1.d, p2, z1.d
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: str q0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
@@ -307,8 +307,8 @@ define <2 x i16> @test_compress_v2i16_with_sve(<2 x i16> %vec, <2 x i1> %mask) {
; CHECK-NEXT: shl v1.2s, v1.2s, #31
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: compact z0.s, p1, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%out = call <2 x i16> @llvm.experimental.vector.compress(<2 x i16> %vec, <2 x i1> %mask, <2 x i16> poison)
More information about the llvm-commits
mailing list