[llvm-branch-commits] [llvm] c73ae74 - [AArch64][SVE] Add optimization to remove redundant ptest instructions
Bradley Smith via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jan 5 07:35:11 PST 2021
Author: Bradley Smith
Date: 2021-01-05T15:28:36Z
New Revision: c73ae747cb0c5546ac4a91a9609d5407e5b03897
URL: https://github.com/llvm/llvm-project/commit/c73ae747cb0c5546ac4a91a9609d5407e5b03897
DIFF: https://github.com/llvm/llvm-project/commit/c73ae747cb0c5546ac4a91a9609d5407e5b03897.diff
LOG: [AArch64][SVE] Add optimization to remove redundant ptest instructions
Co-Authored-by: Graham Hunter <graham.hunter at arm.com>
Co-Authored-by: Paul Walker <paul.walker at arm.com>
Differential Revision: https://reviews.llvm.org/D93292
Added:
llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir
llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir
Modified:
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.h
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-setcc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 3335071fe487..cf08f56e5b08 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -60,10 +60,14 @@ class AArch64Inst<Format f, string cstr> : Instruction {
bits<2> Form = F.Value;
// Defaults
+ bit isWhile = 0;
+ bit isPTestLike = 0;
FalseLanesEnum FalseLanes = FalseLanesNone;
DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
ElementSizeEnum ElementSize = ElementSizeNone;
+ let TSFlags{10} = isPTestLike;
+ let TSFlags{9} = isWhile;
let TSFlags{8-7} = FalseLanes.Value;
let TSFlags{6-3} = DestructiveInstType.Value;
let TSFlags{2-0} = ElementSize.Value;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index ecd0c074e73e..3dbda0de75a1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1119,6 +1119,13 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
switch (MI.getOpcode()) {
default:
break;
+ case AArch64::PTEST_PP:
+ SrcReg = MI.getOperand(0).getReg();
+ SrcReg2 = MI.getOperand(1).getReg();
+ // Not sure about the mask and value for now...
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
case AArch64::SUBSWrr:
case AArch64::SUBSWrs:
case AArch64::SUBSWrx:
@@ -1290,6 +1297,127 @@ static bool areCFlagsAccessedBetweenInstrs(
return false;
}
+/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
+/// operation which could set the flags in an identical manner
+bool AArch64InstrInfo::optimizePTestInstr(
+ MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
+ const MachineRegisterInfo *MRI) const {
+ auto *Mask = MRI->getUniqueVRegDef(MaskReg);
+ auto *Pred = MRI->getUniqueVRegDef(PredReg);
+ auto NewOp = Pred->getOpcode();
+ bool OpChanged = false;
+
+ unsigned MaskOpcode = Mask->getOpcode();
+ unsigned PredOpcode = Pred->getOpcode();
+ bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
+ bool PredIsWhileLike = isWhileOpcode(PredOpcode);
+
+ if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
+ // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
+ // deactivate any lanes OTHER_INST might set.
+ uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
+ uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
+
+ // Must be an all active predicate of matching element size.
+ if ((PredElementSize != MaskElementSize) ||
+ (Mask->getOperand(1).getImm() != 31))
+ return false;
+
+ // Fallthough to simply remove the PTEST.
+ } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
+ // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
+ // instruction that sets the flags as PTEST would.
+
+ // Fallthough to simply remove the PTEST.
+ } else if (PredIsPTestLike) {
+ // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
+ // instructions use the same predicate.
+ auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PTestLikeMask)
+ return false;
+
+ // Fallthough to simply remove the PTEST.
+ } else {
+ switch (Pred->getOpcode()) {
+ case AArch64::BRKB_PPzP:
+ case AArch64::BRKPB_PPzPP: {
+ // Op 0 is chain, 1 is the mask, 2 the previous predicate to
+ // propagate, 3 the new predicate.
+
+ // Check to see if our mask is the same as the brkpb's. If
+ // not the resulting flag bits may be
diff erent and we
+ // can't remove the ptest.
+ auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PredMask)
+ return false;
+
+ // Switch to the new opcode
+ NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
+ : AArch64::BRKPBS_PPzPP;
+ OpChanged = true;
+ break;
+ }
+ case AArch64::BRKN_PPzP: {
+ auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PredMask)
+ return false;
+
+ NewOp = AArch64::BRKNS_PPzP;
+ OpChanged = true;
+ break;
+ }
+ default:
+ // Bail out if we don't recognize the input
+ return false;
+ }
+ }
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // If the predicate is in a
diff erent block (possibly because its been
+ // hoisted out), then assume the flags are set in between statements.
+ if (Pred->getParent() != PTest->getParent())
+ return false;
+
+ // If another instruction between the propagation and test sets the
+ // flags, don't remove the ptest.
+ MachineBasicBlock::iterator I = Pred, E = PTest;
+ ++I; // Skip past the predicate op itself.
+ for (; I != E; ++I) {
+ const MachineInstr &Inst = *I;
+
+ // TODO: If the ptest flags are unused, we could still remove it.
+ if (Inst.modifiesRegister(AArch64::NZCV, TRI))
+ return false;
+ }
+
+ // If we pass all the checks, it's safe to remove the PTEST and use the flags
+ // as they are prior to PTEST. Sometimes this requires the tested PTEST
+ // operand to be replaced with an equivalent instruction that also sets the
+ // flags.
+ Pred->setDesc(get(NewOp));
+ PTest->eraseFromParent();
+ if (OpChanged) {
+ bool succeeded = UpdateOperandRegClass(*Pred);
+ (void)succeeded;
+ assert(succeeded && "Operands have incompatible register classes!");
+ Pred->addRegisterDefined(AArch64::NZCV, TRI);
+ }
+
+ // Ensure that the flags def is live.
+ if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
+ unsigned i = 0, e = Pred->getNumOperands();
+ for (; i != e; ++i) {
+ MachineOperand &MO = Pred->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
+ MO.setIsDead(false);
+ break;
+ }
+ }
+ }
+ return true;
+}
+
/// Try to optimize a compare instruction. A compare instruction is an
/// instruction which produces AArch64::NZCV. It can be truly compare
/// instruction
@@ -1328,6 +1456,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
return true;
}
+ if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
+ return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
+
// Continue only if we have a "ri" where immediate is zero.
// FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
// function.
@@ -7042,6 +7173,14 @@ uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
return get(Opc).TSFlags & AArch64::ElementSizeMask;
}
+bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
+}
+
+bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
+}
+
unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
return AArch64::BLRNoIP;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index b45ae92df960..7434987e0617 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -280,6 +280,12 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
/// Returns the vector element size (B, H, S or D) of an SVE opcode.
uint64_t getElementSizeForOpcode(unsigned Opc) const;
+ /// Returns true if the opcode is for an SVE instruction that sets the
+ /// condition codes as if it's results had been fed to a PTEST instruction
+ /// along with the same general predicate.
+ bool isPTestLikeOpcode(unsigned Opc) const;
+ /// Returns true if the opcode is for an SVE WHILE## instruction.
+ bool isWhileOpcode(unsigned Opc) const;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
static bool isFalkorShiftExtFast(const MachineInstr &MI);
@@ -328,6 +334,12 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
/// Returns an unused general-purpose register which can be used for
/// constructing an outlined call if one exists. Returns 0 otherwise.
unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
+
+ /// Remove a ptest of a predicate-generating operation that already sets, or
+ /// can be made to set, the condition codes in an identical manner
+ bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg,
+ unsigned PredReg,
+ const MachineRegisterInfo *MRI) const;
};
/// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
@@ -411,6 +423,18 @@ static inline bool isIndirectBranchOpcode(int Opc) {
return false;
}
+static inline bool isPTrueOpcode(unsigned Opc) {
+ switch (Opc) {
+ case AArch64::PTRUE_B:
+ case AArch64::PTRUE_H:
+ case AArch64::PTRUE_S:
+ case AArch64::PTRUE_D:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// Return opcode to be used for indirect calls.
unsigned getBLRCallOpcode(const MachineFunction &MF);
@@ -418,6 +442,7 @@ unsigned getBLRCallOpcode(const MachineFunction &MF);
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit
#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits
+#define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits
// }
namespace AArch64 {
@@ -450,9 +475,14 @@ enum FalseLaneType {
FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
};
+// NOTE: This is a bit field.
+static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1);
+static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2);
+
#undef TSFLAG_ELEMENT_SIZE_TYPE
#undef TSFLAG_DESTRUCTIVE_INST_TYPE
#undef TSFLAG_FALSE_LANE_TYPE
+#undef TSFLAG_INSTR_FLAGS
int getSVEPseudoMap(uint16_t Opcode);
int getSVERevInstr(uint16_t Opcode);
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8208eb42dbfa..1020a81a3494 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -280,6 +280,7 @@ class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Inst{3-0} = Pd;
let Defs = !if(!eq (opc{0}, 1), [NZCV], []);
+ let ElementSize = pprty.ElementSize;
let isReMaterializable = 1;
}
@@ -532,6 +533,7 @@ class sve_int_ptest<bits<6> opc, string asm>
let Inst{4-0} = 0b00000;
let Defs = [NZCV];
+ let isCompare = 1;
}
class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
@@ -4285,6 +4287,8 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
let Inst{3-0} = Pd;
let Defs = [NZCV];
+ let ElementSize = pprty.ElementSize;
+ let isPTestLike = 1;
}
multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
@@ -4357,6 +4361,7 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let isPTestLike = 1;
}
multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
@@ -4416,6 +4421,8 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
let Inst{3-0} = Pd;
let Defs = [NZCV];
+ let ElementSize = pprty.ElementSize;
+ let isPTestLike = 1;
}
multiclass sve_int_ucmp_vi<bits<2> opc, string asm, CondCode cc,
@@ -4478,6 +4485,8 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
let Inst{3-0} = Pd;
let Defs = [NZCV];
+ let ElementSize = pprty.ElementSize;
+ let isWhile = 1;
}
multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4522,6 +4531,8 @@ class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
let Inst{3-0} = Pd;
let Defs = [NZCV];
+ let ElementSize = pprty.ElementSize;
+ let isWhile = 1;
}
multiclass sve2_int_while_rr<bits<1> rw, string asm, string op> {
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll
new file mode 100644
index 000000000000..a593b98c030e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll
@@ -0,0 +1,79 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+; Test that redundant ptest instruction is removed when using a flag setting brk
+
+define i32 @brkpb(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: brkpb:
+; CHECK: brkpbs p0.b, p0/z, p1.b, p2.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkpb.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+define i32 @brkb(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
+; CHECK-LABEL: brkb:
+; CHECK: brkbs p0.b, p0/z, p1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+define i32 @brkn(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: brkn:
+; CHECK: brkns p2.b, p0/z, p1.b, p2.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkn.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+; Test that ptest instruction is not removed when using a non-flag setting brk
+
+define i32 @brkpb_neg(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: brkpb_neg:
+; CHECK: brkpb p0.b, p0/z, p1.b, p2.b
+; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkpb.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+define i32 @brkb_neg(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
+; CHECK-LABEL: brkb_neg:
+; CHECK: brkb p0.b, p0/z, p1.b
+; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+define i32 @brkn_neg(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: brkn_neg:
+; CHECK: brkn p2.b, p0/z, p1.b, p2.b
+; CHECK-NEXT: ptest p1, p2.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkn.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.brkpb.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.brkn.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
new file mode 100644
index 000000000000..157a73b4f06e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+;
+; Compares
+;
+
+define i32 @cmpeq_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmpeq_nxv16i8:
+; CHECK: cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+;
+; Immediate Compares
+;
+
+define i32 @cmpeq_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: cmpeq_imm_nxv16i8:
+; CHECK: cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %conv = zext i1 %3 to i32
+ ret i32 %conv
+}
+
+;
+; Wide Compares
+;
+
+define i32 @cmpeq_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpeq_wide_nxv16i8:
+; CHECK: cmpeq p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+
+declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
new file mode 100644
index 000000000000..b41411121e4c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
@@ -0,0 +1,534 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: cmpeq_nxv16i8
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: zpr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+ - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0, $z1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: cmpeq_nxv16i8
+ ; CHECK: %3:ppr = CMPEQ_PPzZZ_B %0, %1, %2, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %2:zpr = COPY $z1
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %3:ppr = CMPEQ_PPzZZ_B %0, %1, %2, implicit-def dead $nzcv
+ PTEST_PP %0, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_nxv8i16
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: zpr }
+ - { id: 3, class: ppr_3b }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+ - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0, $z1
+
+ ; CHECK-LABEL: name: cmpeq_nxv8i16
+ ; CHECK-NOT: PTEST
+ %2:zpr = COPY $z1
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %4:ppr = CMPEQ_PPzZZ_H %0, %1, %2, implicit-def dead $nzcv
+ PTEST_PP %0, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_nxv4i32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: zpr }
+ - { id: 3, class: ppr_3b }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+ - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0, $z1
+
+ ; CHECK-LABEL: name: cmpeq_nxv4i32
+ ; CHECK-NOT: PTEST
+ %2:zpr = COPY $z1
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %4:ppr = CMPEQ_PPzZZ_S %0, %1, %2, implicit-def dead $nzcv
+ PTEST_PP %0, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_nxv2i64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: zpr }
+ - { id: 3, class: ppr_3b }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+ - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0, $z1
+
+ ; CHECK-LABEL: name: cmpeq_nxv2i64
+ ; CHECK-NOT: PTEST
+ %2:zpr = COPY $z1
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %4:ppr = CMPEQ_PPzZZ_D %0, %1, %2, implicit-def dead $nzcv
+ PTEST_PP %0, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_imm_nxv16i8
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0
+
+ ; CHECK-LABEL: name: cmpeq_imm_nxv16i8
+ ; CHECK-NOT: PTEST
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
+ %3:ppr = PTRUE_B 31
+ PTEST_PP killed %3, killed %2, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_imm_nxv8i16
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: gpr32 }
+ - { id: 6, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0
+
+ ; CHECK-LABEL: name: cmpeq_imm_nxv8i16
+ ; CHECK-NOT: PTEST
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %2:ppr = CMPEQ_PPzZI_H %0, %1, 0, implicit-def dead $nzcv
+ PTEST_PP %0, %2, implicit-def $nzcv
+ %5:gpr32 = COPY $wzr
+ %6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv
+ $w0 = COPY %6
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_imm_nxv4i32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: gpr32 }
+ - { id: 6, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0
+
+ ; CHECK-LABEL: name: cmpeq_imm_nxv4i32
+ ; CHECK-NOT: PTEST
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %2:ppr = CMPEQ_PPzZI_S %0, %1, 0, implicit-def dead $nzcv
+ PTEST_PP %0, %2, implicit-def $nzcv
+ %5:gpr32 = COPY $wzr
+ %6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv
+ $w0 = COPY %6
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_imm_nxv2i64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: gpr32 }
+ - { id: 6, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0
+
+ ; CHECK-LABEL: name: cmpeq_imm_nxv2i64
+ ; CHECK-NOT: PTEST
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %2:ppr = CMPEQ_PPzZI_D %0, %1, 0, implicit-def dead $nzcv
+ PTEST_PP %0, %2, implicit-def $nzcv
+ %5:gpr32 = COPY $wzr
+ %6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv
+ $w0 = COPY %6
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_wide_nxv16i8
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: zpr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+ - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0, $z1
+
+ ; CHECK-LABEL: name: cmpeq_wide_nxv16i8
+ ; CHECK-NOT: PTEST
+ %2:zpr = COPY $z1
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %3:ppr = CMPEQ_WIDE_PPzZZ_B %0, %1, %2, implicit-def dead $nzcv
+ PTEST_PP %0, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_wide_nxv8i16
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: zpr }
+ - { id: 3, class: ppr_3b }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+ - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0, $z1
+
+ ; CHECK-LABEL: name: cmpeq_wide_nxv8i16
+ ; CHECK-NOT: PTEST
+ %2:zpr = COPY $z1
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %4:ppr = CMPEQ_WIDE_PPzZZ_H %0, %1, %2, implicit-def dead $nzcv
+ PTEST_PP %0, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_wide_nxv4i32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: zpr }
+ - { id: 3, class: ppr_3b }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+ - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0, $z1
+
+ ; CHECK-LABEL: name: cmpeq_wide_nxv4i32
+ ; CHECK-NOT: PTEST
+ %2:zpr = COPY $z1
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %4:ppr = CMPEQ_WIDE_PPzZZ_S %0, %1, %2, implicit-def dead $nzcv
+ PTEST_PP %0, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_imm_nxv16i8_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0
+
+ ; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_not_all_active
+ ; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
+ ; CHECK-NEXT: %3:ppr = PTRUE_B 0
+ ; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
+ %3:ppr = PTRUE_B 0
+ PTEST_PP killed %3, killed %2, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_imm_nxv16i8_ptest_of_halfs
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0
+
+ ; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_of_halfs
+ ; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
+ ; CHECK-NEXT: %3:ppr = PTRUE_H 31
+ ; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
+ %3:ppr = PTRUE_H 31
+ PTEST_PP killed %3, killed %2, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_imm_nxv16i8_ptest_with_unique_pg
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$p1', virtual-reg: '%3' }
+ - { reg: '$z0', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $p1, $z0
+
+ ; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_with_unique_pg
+ ; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
+ ; CHECK-NEXT: %3:ppr = COPY $p1
+ ; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
+ %3:ppr = COPY $p1
+ PTEST_PP killed %3, killed %2, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: cmpeq_nxv16i8_ptest_with_matching_operands
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr_3b }
+ - { id: 1, class: zpr }
+ - { id: 2, class: zpr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$p0', virtual-reg: '%0' }
+ - { reg: '$z0', virtual-reg: '%1' }
+ - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $p0, $z0, $z1
+
+ ; CHECK-LABEL: name: cmpeq_nxv16i8_ptest_with_matching_operands
+ ; CHECK-NOT: PTEST
+ %2:zpr = COPY $z1
+ %1:zpr = COPY $z0
+ %0:ppr_3b = COPY $p0
+ %3:ppr = CMPEQ_PPzZZ_B %0, %1, %2, implicit-def dead $nzcv
+ PTEST_PP %3, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
new file mode 100644
index 000000000000..6363c3deeba1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+;
+; Compares
+;
+
+define i32 @cmpge_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmpge_nxv16i8:
+; CHECK: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+;
+; Immediate Compares
+;
+
+define i32 @cmpge_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: cmpge_imm_nxv16i8:
+; CHECK: cmpge p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %conv = zext i1 %3 to i32
+ ret i32 %conv
+}
+
+;
+; Wide Compares
+;
+
+define i32 @cmpge_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpge_wide_nxv16i8:
+; CHECK: cmpge p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+
+declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
new file mode 100644
index 000000000000..4d3c7e04f696
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+;
+; Compares
+;
+
+define i32 @cmpgt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmpgt_nxv16i8:
+; CHECK: cmpgt p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+;
+; Immediate Compares
+;
+
+define i32 @cmpgt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: cmpgt_imm_nxv16i8:
+; CHECK: cmpgt p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %conv = zext i1 %3 to i32
+ ret i32 %conv
+}
+
+;
+; Wide Compares
+;
+
+define i32 @cmpgt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpgt_wide_nxv16i8:
+; CHECK: cmpgt p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+
+declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
new file mode 100644
index 000000000000..5bba0b48cb30
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+;
+; Compares
+;
+
+define i32 @cmphi_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmphi_nxv16i8:
+; CHECK: cmphi p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+;
+; Immediate Compares
+;
+
+define i32 @cmphi_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: cmphi_imm_nxv16i8:
+; CHECK: cmphi p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %conv = zext i1 %3 to i32
+ ret i32 %conv
+}
+
+;
+; Wide Compares
+;
+
+define i32 @cmphi_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmphi_wide_nxv16i8:
+; CHECK: cmphi p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+
+declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
new file mode 100644
index 000000000000..ff5a1ec09abf
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+;
+; Compares
+;
+
+define i32 @cmphs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmphs_nxv16i8:
+; CHECK: cmphs p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+;
+; Immediate Compares
+;
+
+define i32 @cmphs_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: cmphs_imm_nxv16i8:
+; CHECK: cmphs p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %conv = zext i1 %3 to i32
+ ret i32 %conv
+}
+
+;
+; Wide Compares
+;
+
+define i32 @cmphs_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmphs_wide_nxv16i8:
+; CHECK: cmphs p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+
+declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
new file mode 100644
index 000000000000..3513acef7bbc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+;
+; Immediate Compares
+;
+
+define i32 @cmple_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: cmple_imm_nxv16i8:
+; CHECK: cmple p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %conv = zext i1 %3 to i32
+ ret i32 %conv
+}
+
+;
+; Wide Compares
+;
+
+define i32 @cmple_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmple_wide_nxv16i8:
+; CHECK: cmple p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+
+declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
new file mode 100644
index 000000000000..eae748d56e05
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+;
+; Immediate Compares
+;
+
+define i32 @cmplo_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: cmplo_imm_nxv16i8:
+; CHECK: cmplo p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %conv = zext i1 %3 to i32
+ ret i32 %conv
+}
+
+;
+; Wide Compares
+;
+
+define i32 @cmplo_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmplo_wide_nxv16i8:
+; CHECK: cmplo p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmplo.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmplo.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+
+declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
new file mode 100644
index 000000000000..d53ece953cbf
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+;
+; Immediate Compares
+;
+
+define i32 @cmpls_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: cmpls_imm_nxv16i8:
+; CHECK: cmpls p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %conv = zext i1 %3 to i32
+ ret i32 %conv
+}
+
+;
+; Wide Compares
+;
+
+define i32 @cmpls_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpls_wide_nxv16i8:
+; CHECK: cmpls p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpls.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpls.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+
+declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
new file mode 100644
index 000000000000..fca33d72bce4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+;
+; Immediate Compares
+;
+
+define i32 @cmplt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: cmplt_imm_nxv16i8:
+; CHECK: cmplt p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %conv = zext i1 %3 to i32
+ ret i32 %conv
+}
+
+;
+; Wide Compares
+;
+
+define i32 @cmplt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmplt_wide_nxv16i8:
+; CHECK: cmplt p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmplt.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmplt.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+
+declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
new file mode 100644
index 000000000000..ead20da2827a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+
+;
+; Compares
+;
+
+define i32 @cmpne_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmpne_nxv16i8:
+; CHECK: cmpne p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+;
+; Immediate Compares
+;
+
+define i32 @cmpne_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: cmpne_imm_nxv16i8:
+; CHECK: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %3 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
+ %conv = zext i1 %3 to i32
+ ret i32 %conv
+}
+
+;
+; Wide Compares
+;
+
+define i32 @cmpne_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpne_wide_nxv16i8:
+; CHECK: cmpne p0.b, p0/z, z0.b, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
+ %conv = zext i1 %2 to i32
+ ret i32 %conv
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+
+declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
new file mode 100644
index 000000000000..eaec3fb6303c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
@@ -0,0 +1,444 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: whilege_b8_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: whilege_b8_s32
+ ; CHECK: %3:ppr = WHILEGE_PWW_B %0, %1, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEGE_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b8_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilege_b8_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b16_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilege_b16_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILEGE_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b16_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilege_b16_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILEGE_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b32_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilege_b32_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILEGE_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b32_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilege_b32_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILEGE_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b64_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilege_b64_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILEGE_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b64_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilege_b64_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILEGE_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b8_s64_keep_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilege_b8_s64_keep_ptest_not_all_active
+ ; CHECK: %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 0
+ %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b8_s64_keep_ptest_of_halfs
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilege_b8_s64_keep_ptest_of_halfs
+ ; CHECK: %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b8_s64_keep_ptest_of_words
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilege_b8_s64_keep_ptest_of_words
+ ; CHECK: %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilege_b8_s64_keep_ptest_of_doublewords
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilege_b8_s64_keep_ptest_of_doublewords
+ ; CHECK: %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
new file mode 100644
index 000000000000..c48df413e81f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
@@ -0,0 +1,475 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: whilegt_b8_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: whilegt_b8_s32
+ ; CHECK: %3:ppr = WHILEGT_PWW_B %0, %1, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEGT_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b8_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilegt_b8_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEGT_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b16_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilegt_b16_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILEGT_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b16_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilegt_b16_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b32_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilegt_b32_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILEGT_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b32_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilegt_b32_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILEGT_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b64_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilegt_b64_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILEGT_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b64_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilegt_b64_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILEGT_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b16_s64_keep_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGT, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilegt_b16_s64_keep_ptest_not_all_active
+ ; CHECK: %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 1
+ %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b16_s64_keep_ptest_of_bytes
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGT, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilegt_b16_s64_keep_ptest_of_bytes
+ ; CHECK: %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b16_s64_keep_ptest_of_words
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGT, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilegt_b16_s64_keep_ptest_of_words
+ ; CHECK: %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b16_s64_keep_ptest_of_doublewords
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGT, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilegt_b16_s64_keep_ptest_of_doublewords
+ ; CHECK: %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilegt_b8_s32_ptest_with_matching_operands
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: gpr32 }
+ - { id: 4, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilegt_b8_s32_ptest_with_matching_operands
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = WHILEGT_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, killed %2, implicit-def $nzcv
+ %3:gpr32 = COPY $wzr
+ %4:gpr32 = CSINCWr %3, $wzr, 0, implicit $nzcv
+ $w0 = COPY %4
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
new file mode 100644
index 000000000000..f186e825c14a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
@@ -0,0 +1,444 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: whilehi_b8_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: whilehi_b8_s32
+ ; CHECK: %3:ppr = WHILEHI_PWW_B %0, %1, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEHI_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b8_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilehi_b8_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEHI_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b16_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilehi_b16_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILEHI_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b16_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilehi_b16_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILEHI_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b32_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilehi_b32_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILEHI_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b32_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilehi_b32_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b64_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilehi_b64_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILEHI_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b64_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilehi_b64_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILEHI_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b32_s64_keep_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEHI, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilehi_b32_s64_keep_ptest_not_all_active
+ ; CHECK: %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 29
+ %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b32_s64_keep_ptest_of_bytes
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEHI, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilehi_b32_s64_keep_ptest_of_bytes
+ ; CHECK: %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b32_s64_keep_ptest_of_halfs
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEHI, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilehi_b32_s64_keep_ptest_of_halfs
+ ; CHECK: %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehi_b32_s64_keep_ptest_of_doublewords
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEHI, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilehi_b32_s64_keep_ptest_of_doublewords
+ ; CHECK: %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
new file mode 100644
index 000000000000..1c35524f37a9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
@@ -0,0 +1,444 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: whilehs_b8_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: whilehs_b8_s32
+ ; CHECK: %3:ppr = WHILEHS_PWW_B %0, %1, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEHS_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b8_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilehs_b8_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEHS_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b16_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilehs_b16_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILEHS_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b16_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilehs_b16_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILEHS_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b32_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilehs_b32_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILEHS_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b32_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilehs_b32_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILEHS_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b64_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilehs_b64_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILEHS_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b64_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilehs_b64_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b64_s64_keep_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEHS, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilehs_b64_s64_keep_ptest_not_all_active
+ ; CHECK: %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 30
+ %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b64_s64_keep_ptest_of_bytes
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEHS, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilehs_b64_s64_keep_ptest_of_bytes
+ ; CHECK: %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b64_s64_keep_ptest_of_halfs
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEHS, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilehs_b64_s64_keep_ptest_of_halfs
+ ; CHECK: %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilehs_b64_s64_keep_ptest_of_words
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEHS, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilehs_b64_s64_keep_ptest_of_words
+ ; CHECK: %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
new file mode 100644
index 000000000000..32954d593c1d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
@@ -0,0 +1,444 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: whilele_b8_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: whilele_b8_s32
+ ; CHECK: %3:ppr = WHILELE_PWW_B %0, %1, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b8_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilele_b8_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELE_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b16_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilele_b16_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILELE_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b16_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilele_b16_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILELE_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b32_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilele_b32_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILELE_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b32_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilele_b32_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILELE_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b64_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilele_b64_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILELE_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b64_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilele_b64_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILELE_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b8_s32_keep_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilele_b8_s32_keep_ptest_not_all_active
+ ; CHECK: %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 7
+ %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b8_s32_keep_ptest_of_halfs
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilele_b8_s32_keep_ptest_of_halfs
+ ; CHECK: %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b8_s32_keep_ptest_of_words
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilele_b8_s32_keep_ptest_of_words
+ ; CHECK: %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilele_b8_s32_keep_ptest_of_doublewords
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilele_b8_s32_keep_ptest_of_doublewords
+ ; CHECK: %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
new file mode 100644
index 000000000000..cca0ab8ef210
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
@@ -0,0 +1,444 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: whilelo_b8_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: whilelo_b8_s32
+ ; CHECK: %3:ppr = WHILELO_PWW_B %0, %1, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELO_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b8_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilelo_b8_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELO_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b16_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilelo_b16_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b16_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilelo_b16_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILELO_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b32_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilelo_b32_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILELO_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b32_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilelo_b32_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILELO_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b64_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilelo_b64_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILELO_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b64_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilelo_b64_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILELO_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b16_s32_keep_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELO, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilelo_b16_s32_keep_ptest_not_all_active
+ ; CHECK: %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 6
+ %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b16_s32_keep_ptest_of_bytes
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELO, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilelo_b16_s32_keep_ptest_of_bytes
+ ; CHECK: %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b16_s32_keep_ptest_of_words
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELO, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilelo_b16_s32_keep_ptest_of_words
+ ; CHECK: %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelo_b16_s32_keep_ptest_of_doublewords
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELO, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilelo_b16_s32_keep_ptest_of_doublewords
+ ; CHECK: %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
new file mode 100644
index 000000000000..4bae3a1986f4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
@@ -0,0 +1,444 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: whilels_b8_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: whilels_b8_s32
+ ; CHECK: %3:ppr = WHILELS_PWW_B %0, %1, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELS_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b8_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilels_b8_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELS_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b16_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilels_b16_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILELS_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b16_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilels_b16_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILELS_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b32_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilels_b32_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b32_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilels_b32_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILELS_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b64_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilels_b64_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILELS_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b64_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilels_b64_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILELS_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b32_s32_keep_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELS, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilels_b32_s32_keep_ptest_not_all_active
+ ; CHECK: %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 5
+ %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b32_s32_keep_ptest_of_bytes
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELS, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilels_b32_s32_keep_ptest_of_bytes
+ ; CHECK: %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b32_s32_keep_ptest_of_halfs
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELS, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilels_b32_s32_keep_ptest_of_halfs
+ ; CHECK: %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilels_b32_s32_keep_ptest_of_doublewords
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELS, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilels_b32_s32_keep_ptest_of_doublewords
+ ; CHECK: %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
new file mode 100644
index 000000000000..3c6a9e21b4c6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
@@ -0,0 +1,444 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: whilelt_b8_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: whilelt_b8_s32
+ ; CHECK: %3:ppr = WHILELT_PWW_B %0, %1, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELT_PWW_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b8_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilelt_b8_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELT_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b16_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilelt_b16_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILELT_PWW_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b16_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilelt_b16_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILELT_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b32_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilelt_b32_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILELT_PWW_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b32_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilelt_b32_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILELT_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b64_s32
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: whilelt_b64_s32
+ ; CHECK-NOT: PTEST
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b64_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilelt_b64_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILELT_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b64_s32_keep_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELT, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilelt_b64_s32_keep_ptest_not_all_active
+ ; CHECK: %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_D 4
+ %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b64_s32_keep_ptest_of_bytes
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELT, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilelt_b64_s32_keep_ptest_of_bytes
+ ; CHECK: %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b64_s32_keep_ptest_of_halfs
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELT, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilelt_b64_s32_keep_ptest_of_halfs
+ ; CHECK: %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_H 31
+ %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilelt_b64_s32_keep_ptest_of_words
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $w0, $w1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILELT, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilelt_b64_s32_keep_ptest_of_words
+ ; CHECK: %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr32 = COPY $w1
+ %0:gpr32 = COPY $w0
+ %2:ppr = PTRUE_S 31
+ %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir
new file mode 100644
index 000000000000..7b67f8d2a299
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir
@@ -0,0 +1,306 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: whilerw_b8_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: whilerw_b8_s64
+ ; CHECK: %3:ppr = WHILERW_PXX_B %0, %1, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilerw_b16_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilerw_b16_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILERW_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilerw_b32_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilerw_b32_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILERW_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilerw_b64_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilerw_b64_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILERW_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilerw_b8_s64_keep_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilerw_b8_s64_keep_ptest_not_all_active
+ ; CHECK: %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 0
+ %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilerw_b8_s64_keep_ptest_of_halfs
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilerw_b8_s64_keep_ptest_of_halfs
+ ; CHECK: %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilerw_b8_s64_keep_ptest_of_words
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilerw_b8_s64_keep_ptest_of_words
+ ; CHECK: %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilerw_b8_s64_keep_ptest_of_doublewords
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilerw_b8_s64_keep_ptest_of_doublewords
+ ; CHECK: %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir
new file mode 100644
index 000000000000..ebf8e0771eee
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir
@@ -0,0 +1,306 @@
+# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+# Test instruction sequences where PTEST is redundant and thus gets removed.
+---
+name: whilewr_b8_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; Here we check the expected sequence with subsequent tests
+ ; just asserting there is no PTEST instruction.
+ ;
+ ; CHECK-LABEL: name: whilewr_b8_s64
+ ; CHECK: %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 31
+ %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilewr_b16_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilewr_b16_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %4:ppr = WHILEWR_PXX_H %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilewr_b32_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilewr_b32_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %4:ppr = WHILEWR_PXX_S %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilewr_b64_s64
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: ppr }
+ - { id: 5, class: ppr }
+ - { id: 6, class: gpr32 }
+ - { id: 7, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: whilewr_b64_s64
+ ; CHECK-NOT: PTEST
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %4:ppr = WHILEWR_PXX_D %0, %1, implicit-def dead $nzcv
+ PTEST_PP %2, %4, implicit-def $nzcv
+ %6:gpr32 = COPY $wzr
+ %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+ $w0 = COPY %7
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilewr_b8_s64_keep_ptest_not_all_active
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilewr_b8_s64_keep_ptest_not_all_active
+ ; CHECK: %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_B 0
+ %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilewr_b8_s64_keep_ptest_of_halfs
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilewr_b8_s64_keep_ptest_of_halfs
+ ; CHECK: %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_H 31
+ %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilewr_b8_s64_keep_ptest_of_words
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilewr_b8_s64_keep_ptest_of_words
+ ; CHECK: %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_S 31
+ %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: whilewr_b8_s64_keep_ptest_of_doublewords
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64 }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: ppr }
+ - { id: 3, class: ppr }
+ - { id: 4, class: gpr32 }
+ - { id: 5, class: gpr32 }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; PTEST is not redundant when it's Pg operand is not an all active predicate
+ ; of element size matching the WHILEGE, which is the implicitly predicate
+ ; used by WHILE when calculating the condition codes.
+ ;
+ ; CHECK-LABEL: name: whilewr_b8_s64_keep_ptest_of_doublewords
+ ; CHECK: %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv
+ ; CHECK-NEXT: PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ ; CHECK-NEXT: %4:gpr32 = COPY $wzr
+ ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %2:ppr = PTRUE_D 31
+ %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv
+ PTEST_PP killed %2, killed %3, implicit-def $nzcv
+ %4:gpr32 = COPY $wzr
+ %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-setcc.ll b/llvm/test/CodeGen/AArch64/sve-setcc.ll
index 3dbe0eb42283..c82771d59402 100644
--- a/llvm/test/CodeGen/AArch64/sve-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-setcc.ll
@@ -8,7 +8,6 @@
define void @sve_cmplt_setcc_inverted(<vscale x 8 x i16>* %out, <vscale x 8 x i16> %in, <vscale x 8 x i1> %pg) {
; CHECK-LABEL: @sve_cmplt_setcc_inverted
; CHECK: cmplt p1.h, p0/z, z0.h, #0
-; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: b.ne
entry:
%0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %in, <vscale x 2 x i64> zeroinitializer)
More information about the llvm-branch-commits
mailing list