[llvm] 4563024 - [Target][ARM] Adding MVE VPT Optimisation Pass
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 14 07:16:51 PDT 2020
Author: Pierre-vh
Date: 2020-04-14T15:16:27+01:00
New Revision: 456302435625e83cf474a6a085732de31f1f2d16
URL: https://github.com/llvm/llvm-project/commit/456302435625e83cf474a6a085732de31f1f2d16
DIFF: https://github.com/llvm/llvm-project/commit/456302435625e83cf474a6a085732de31f1f2d16.diff
LOG: [Target][ARM] Adding MVE VPT Optimisation Pass
Differential Revision: https://reviews.llvm.org/D76709
Added:
llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
llvm/test/CodeGen/Thumb2/mve-vpt-blocks.ll
llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir
Modified:
llvm/lib/Target/ARM/ARM.h
llvm/lib/Target/ARM/ARMTargetMachine.cpp
llvm/lib/Target/ARM/CMakeLists.txt
llvm/test/CodeGen/ARM/O3-pipeline.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h
index 3412813a3ef2..7398968bb24a 100644
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@@ -47,6 +47,7 @@ FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createMVEVPTBlockPass();
+FunctionPass *createMVEVPTOptimisationsPass();
FunctionPass *createARMOptimizeBarriersPass();
FunctionPass *createThumb2SizeReductionPass(
std::function<bool(const Function &)> Ftor = nullptr);
@@ -66,6 +67,7 @@ void initializeARMExpandPseudoPass(PassRegistry &);
void initializeThumb2SizeReducePass(PassRegistry &);
void initializeThumb2ITBlockPass(PassRegistry &);
void initializeMVEVPTBlockPass(PassRegistry &);
+void initializeMVEVPTOptimisationsPass(PassRegistry &);
void initializeARMLowOverheadLoopsPass(PassRegistry &);
void initializeMVETailPredicationPass(PassRegistry &);
void initializeMVEGatherScatterLoweringPass(PassRegistry &);
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 947bbc3ad3ee..680f2989bcd8 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -96,6 +96,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeARMExpandPseudoPass(Registry);
initializeThumb2SizeReducePass(Registry);
initializeMVEVPTBlockPass(Registry);
+ initializeMVEVPTOptimisationsPass(Registry);
initializeMVETailPredicationPass(Registry);
initializeARMLowOverheadLoopsPass(Registry);
initializeMVEGatherScatterLoweringPass(Registry);
@@ -487,6 +488,8 @@ bool ARMPassConfig::addGlobalInstructionSelect() {
void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None) {
+ addPass(createMVEVPTOptimisationsPass());
+
addPass(createMLxExpansionPass());
if (EnableARMLoadStoreOpt)
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index 7591701857b9..dcc3a7d4c6b6 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_target(ARMCodeGen
MVEGatherScatterLowering.cpp
MVETailPredication.cpp
MVEVPTBlockPass.cpp
+ MVEVPTOptimisationsPass.cpp
Thumb1FrameLowering.cpp
Thumb1InstrInfo.cpp
ThumbRegisterInfo.cpp
diff --git a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
new file mode 100644
index 000000000000..710e8dacaee1
--- /dev/null
+++ b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
@@ -0,0 +1,232 @@
+//===-- MVEVPTOptimisationsPass.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass does a few optimisations related to MVE VPT blocks before
+/// register allocation is performed. The goal is to maximize the sizes of the
+/// blocks that will be created by the MVE VPT Block Insertion pass (which runs
+/// after register allocation). Currently, this pass replaces VCMPs with VPNOTs
+/// when possible, so the Block Insertion pass can delete them later to create
+/// larger VPT blocks.
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/Debug.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-mve-vpt-opts"
+
+namespace {
+class MVEVPTOptimisations : public MachineFunctionPass {
+public:
+ static char ID;
+ const Thumb2InstrInfo *TII;
+ MachineRegisterInfo *MRI;
+
+ MVEVPTOptimisations() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override {
+ return "ARM MVE VPT Optimisation Pass";
+ }
+
+private:
+ bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
+};
+
+char MVEVPTOptimisations::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(MVEVPTOptimisations, DEBUG_TYPE,
+ "ARM MVE VPT Optimisations pass", false, false)
+
+// Returns true if Opcode is any VCMP Opcode.
+static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
+
+// Returns true if a VCMP with this Opcode can have its operands swapped.
+// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
+// and VCMPr instructions (since the r is always on the right).
+static bool CanHaveSwappedOperands(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return true;
+ case ARM::MVE_VCMPf32:
+ case ARM::MVE_VCMPf16:
+ case ARM::MVE_VCMPf32r:
+ case ARM::MVE_VCMPf16r:
+ case ARM::MVE_VCMPi8r:
+ case ARM::MVE_VCMPi16r:
+ case ARM::MVE_VCMPi32r:
+ case ARM::MVE_VCMPu8r:
+ case ARM::MVE_VCMPu16r:
+ case ARM::MVE_VCMPu32r:
+ case ARM::MVE_VCMPs8r:
+ case ARM::MVE_VCMPs16r:
+ case ARM::MVE_VCMPs32r:
+ return false;
+ }
+}
+
+// Returns the CondCode of a VCMP Instruction.
+static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
+ assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
+ return ARMCC::CondCodes(Instr.getOperand(3).getImm());
+}
+
+// Returns true if Cond is equivalent to a VPNOT instruction on the result of
+// Prev. Cond and Prev must be VCMPs.
+static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
+ assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
+
+ // Opcodes must match.
+ if (Cond.getOpcode() != Prev.getOpcode())
+ return false;
+
+ MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
+ MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
+
+ // If the VCMP has the opposite condition with the same operands, we can
+ // replace it with a VPNOT
+ ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
+ ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
+ if (ExpectedCode == GetCondCode(Prev))
+ if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
+ return true;
+ // Check again with operands swapped if possible
+ if (!CanHaveSwappedOperands(Cond.getOpcode()))
+ return false;
+ ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
+ return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
+ CondOP2.isIdenticalTo(PrevOP1);
+}
+
+// Returns true if Instr writes to VCCR.
+static bool IsWritingToVCCR(MachineInstr &Instr) {
+ if (Instr.getNumOperands() == 0)
+ return false;
+ MachineOperand &Dst = Instr.getOperand(0);
+ if (!Dst.isReg())
+ return false;
+ Register DstReg = Dst.getReg();
+ if (!DstReg.isVirtual())
+ return false;
+ MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
+ const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
+ return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
+}
+
+// This optimisation replaces VCMPs with VPNOTs when they are equivalent.
+bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+
+ // The last VCMP that we have seen and that couldn't be replaced.
+ // This is reset when an instruction that writes to VCCR/VPR is found, or when
+ // a VCMP is replaced with a VPNOT.
+ // We'll only replace VCMPs with VPNOTs when this is not null, and when the
+ // current VCMP is the opposite of PrevVCMP.
+ MachineInstr *PrevVCMP = nullptr;
+ // If we find an instruction that kills the result of PrevVCMP, we save the
+ // operand here to remove the kill flag in case we need to use PrevVCMP's
+ // result.
+ MachineOperand *PrevVCMPResultKiller = nullptr;
+
+ for (MachineInstr &Instr : MBB.instrs()) {
+ if (PrevVCMP) {
+ if (MachineOperand *MO = Instr.findRegisterUseOperand(
+ PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
+ // If we come accross the instr that kills PrevVCMP's result, record it
+ // so we can remove the kill flag later if we need to.
+ PrevVCMPResultKiller = MO;
+ }
+ }
+
+ // Ignore predicated instructions.
+ if (getVPTInstrPredicate(Instr) != ARMVCC::None)
+ continue;
+
+ // Only look at VCMPs
+ if (!IsVCMP(Instr.getOpcode())) {
+ // If the instruction writes to VCCR, forget the previous VCMP.
+ if (IsWritingToVCCR(Instr))
+ PrevVCMP = nullptr;
+ continue;
+ }
+
+ if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
+ PrevVCMP = &Instr;
+ continue;
+ }
+
+ // The register containing the result of the VCMP that we're going to
+ // replace.
+ Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
+
+ // Build a VPNOT to replace the VCMP, reusing its operands.
+ MachineInstrBuilder MIBuilder =
+ BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
+ .add(Instr.getOperand(0))
+ .addReg(PrevVCMPResultReg);
+ addUnpredicatedMveVpredNOp(MIBuilder);
+ LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
+ MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
+ Instr.dump());
+
+ // If we found an instruction that uses, and kills PrevVCMP's result,
+ // remove the kill flag.
+ if (PrevVCMPResultKiller)
+ PrevVCMPResultKiller->setIsKill(false);
+
+ // Finally, mark the old VCMP for removal and reset
+ // PrevVCMP/PrevVCMPResultKiller.
+ DeadInstructions.push_back(&Instr);
+ PrevVCMP = nullptr;
+ PrevVCMPResultKiller = nullptr;
+ }
+
+ for (MachineInstr *DeadInstruction : DeadInstructions)
+ DeadInstruction->removeFromParent();
+
+ return !DeadInstructions.empty();
+}
+
+bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
+ const ARMSubtarget &STI =
+ static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+
+ if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
+ return false;
+
+ TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
+ MRI = &Fn.getRegInfo();
+
+ LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
+ << "********** Function: " << Fn.getName() << '\n');
+
+ bool Modified = false;
+ for (MachineBasicBlock &MBB : Fn)
+ Modified |= ReplaceVCMPsByVPNOTs(MBB);
+
+ LLVM_DEBUG(dbgs() << "**************************************\n");
+ return Modified;
+}
+
+/// createMVEVPTOptimisationsPass
+FunctionPass *llvm::createMVEVPTOptimisationsPass() {
+ return new MVEVPTOptimisations();
+}
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 31a437d90144..f37b0863f8d7 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -92,6 +92,7 @@
; CHECK-NEXT: Machine code sinking
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
+; CHECK-NEXT: MVE VPT Optimisation Pass
; CHECK-NEXT: ARM MLA / MLS expansion pass
; CHECK-NEXT: ARM pre- register allocation load / store optimization pass
; CHECK-NEXT: ARM A15 S->D optimizer
diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-blocks.ll b/llvm/test/CodeGen/Thumb2/mve-vpt-blocks.ll
new file mode 100644
index 000000000000..94374308e7d6
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-vpt-blocks.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi --verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s
+
+declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
+
+define arm_aapcs_vfpcc <4 x i32> @vpt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vpt_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vpt.s32 ge, q0, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vptt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vptt_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q3, q0
+; CHECK-NEXT: vptt.s32 ge, q0, q2
+; CHECK-NEXT: vorrt q3, q1, q2
+; CHECK-NEXT: vorrt q0, q3, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %1, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vpttt_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vpttt.s32 ge, q0, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
+ ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vptttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vptttt_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vptttt.s32 ge, q0, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
+ ret <4 x i32> %4
+}
+
+
+define arm_aapcs_vfpcc <4 x i32> @vpte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vpte_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vpte.s32 ge, q0, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vmove q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
+ ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vptte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vptte_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vptte.s32 ge, q0, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vorre q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3)
+ ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vptee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vptee_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vptee.s32 ge, q0, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vorre q0, q1, q2
+; CHECK-NEXT: vorre q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %2)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3)
+ ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vptet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vptet_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vcmp.s32 ge, q0, q2
+; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpnot
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
+ ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpttet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vpttet_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vcmp.s32 ge, q0, q2
+; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
+; CHECK-NEXT: vpstt
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpnot
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
+ %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
+ ret <4 x i32> %5
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vptett_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vptett_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: vcmp.s32 ge, q0, q2
+; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpnot
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpstt
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
+ %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
+ ret <4 x i32> %5
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpteet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vpteet_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: vcmp.s32 ge, q0, q2
+; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpnot
+; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
+ %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4)
+ ret <4 x i32> %5
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpteee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vpteee_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vpteee.s32 ge, q0, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vmove q0, q2
+; CHECK-NEXT: vmove q0, q2
+; CHECK-NEXT: vmove q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
+ %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
+ ret <4 x i32> %5
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vptete_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vptete_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .pad #8
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: vcmp.s32 ge, q0, q2
+; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpnot
+; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
+ %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
+ ret <4 x i32> %5
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpttte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vpttte_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vpttte.s32 ge, q0, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vmove q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3)
+ %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
+ ret <4 x i32> %5
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpttee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: vpttee_block:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vpttee.s32 ge, q0, q2
+; CHECK-NEXT: vorrt q0, q1, q2
+; CHECK-NEXT: vmovt q0, q2
+; CHECK-NEXT: vmove q0, q2
+; CHECK-NEXT: vmove q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = icmp sge <4 x i32> %a, %c
+ %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a)
+ %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
+ %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1)
+ %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3)
+ %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4)
+ ret <4 x i32> %5
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir
new file mode 100644
index 000000000000..496774335438
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir
@@ -0,0 +1,547 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass arm-mve-vpt-opts %s -o - | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main-arm-none-eabi"
+
+ ; Functions are intentionally left blank - see the MIR sequences below.
+
+ define arm_aapcs_vfpcc <4 x float> @vcmp_with_opposite_cond(<4 x float> %inactive1) #0 {
+ entry:
+ ret <4 x float> %inactive1
+ }
+
+ define arm_aapcs_vfpcc <4 x float> @vcmp_with_opposite_cond_and_swapped_operands(<4 x float> %inactive1) #0 {
+ entry:
+ ret <4 x float> %inactive1
+ }
+
+ define arm_aapcs_vfpcc <4 x float> @triple_vcmp(<4 x float> %inactive1) #0 {
+ entry:
+ ret <4 x float> %inactive1
+ }
+
+ define arm_aapcs_vfpcc <4 x float> @killed_vccr_values(<4 x float> %inactive1) #0 {
+ entry:
+ ret <4 x float> %inactive1
+ }
+
+ define arm_aapcs_vfpcc <4 x float> @predicated_vcmps(<4 x float> %inactive1) #0 {
+ entry:
+ ret <4 x float> %inactive1
+ }
+
+ define arm_aapcs_vfpcc <4 x float> @flt_with_swapped_operands(<4 x float> %inactive1) #0 {
+ entry:
+ ret <4 x float> %inactive1
+ }
+
+ define arm_aapcs_vfpcc <4 x float> @
diff erent_opcodes(<4 x float> %inactive1) #0 {
+ entry:
+ ret <4 x float> %inactive1
+ }
+
+ define arm_aapcs_vfpcc <4 x float> @incorrect_condcode(<4 x float> %inactive1) #0 {
+ entry:
+ ret <4 x float> %inactive1
+ }
+
+ define arm_aapcs_vfpcc <4 x float> @vpr_or_vccr_write_between_vcmps(<4 x float> %inactive1) #0 {
+ entry:
+ ret <4 x float> %inactive1
+ }
+
+ attributes #0 = { "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" }
+...
+---
+name: vcmp_with_opposite_cond
+alignment: 4
+body: |
+ ; CHECK-LABEL: name: vcmp_with_opposite_cond
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16_]], 0, $noreg
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT1:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf32_]], 0, $noreg
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT2:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16_]], 0, $noreg
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.4(0x80000000)
+ ; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT3:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32_]], 0, $noreg
+ ; CHECK: bb.4:
+ ; CHECK: successors: %bb.5(0x80000000)
+ ; CHECK: [[MVE_VCMPi8_:%[0-9]+]]:vccr = MVE_VCMPi8 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT4:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8_]], 0, $noreg
+ ; CHECK: bb.5:
+ ; CHECK: successors: %bb.6(0x80000000)
+ ; CHECK: [[MVE_VCMPs16_:%[0-9]+]]:vccr = MVE_VCMPs16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT5:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16_]], 0, $noreg
+ ; CHECK: bb.6:
+ ; CHECK: successors: %bb.7(0x80000000)
+ ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT6:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg
+ ; CHECK: bb.7:
+ ; CHECK: successors: %bb.8(0x80000000)
+ ; CHECK: [[MVE_VCMPs8_:%[0-9]+]]:vccr = MVE_VCMPs8 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT7:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8_]], 0, $noreg
+ ; CHECK: bb.8:
+ ; CHECK: successors: %bb.9(0x80000000)
+ ; CHECK: [[MVE_VCMPu16_:%[0-9]+]]:vccr = MVE_VCMPu16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT8:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16_]], 0, $noreg
+ ; CHECK: bb.9:
+ ; CHECK: successors: %bb.10(0x80000000)
+ ; CHECK: [[MVE_VCMPu32_:%[0-9]+]]:vccr = MVE_VCMPu32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT9:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32_]], 0, $noreg
+ ; CHECK: bb.10:
+ ; CHECK: successors: %bb.11(0x80000000)
+ ; CHECK: [[MVE_VCMPu8_:%[0-9]+]]:vccr = MVE_VCMPu8 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT10:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8_]], 0, $noreg
+ ; CHECK: bb.11:
+ ; CHECK: successors: %bb.12(0x80000000)
+ ; CHECK: [[MVE_VCMPf16r:%[0-9]+]]:vccr = MVE_VCMPf16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT11:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16r]], 0, $noreg
+ ; CHECK: bb.12:
+ ; CHECK: successors: %bb.13(0x80000000)
+ ; CHECK: [[MVE_VCMPf32r:%[0-9]+]]:vccr = MVE_VCMPf32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT12:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf32r]], 0, $noreg
+ ; CHECK: bb.13:
+ ; CHECK: successors: %bb.14(0x80000000)
+ ; CHECK: [[MVE_VCMPi16r:%[0-9]+]]:vccr = MVE_VCMPi16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT13:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16r]], 0, $noreg
+ ; CHECK: bb.14:
+ ; CHECK: successors: %bb.15(0x80000000)
+ ; CHECK: [[MVE_VCMPi32r:%[0-9]+]]:vccr = MVE_VCMPi32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT14:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32r]], 0, $noreg
+ ; CHECK: bb.15:
+ ; CHECK: successors: %bb.16(0x80000000)
+ ; CHECK: [[MVE_VCMPi8r:%[0-9]+]]:vccr = MVE_VCMPi8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT15:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8r]], 0, $noreg
+ ; CHECK: bb.16:
+ ; CHECK: successors: %bb.17(0x80000000)
+ ; CHECK: [[MVE_VCMPs16r:%[0-9]+]]:vccr = MVE_VCMPs16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT16:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16r]], 0, $noreg
+ ; CHECK: bb.17:
+ ; CHECK: successors: %bb.18(0x80000000)
+ ; CHECK: [[MVE_VCMPs32r:%[0-9]+]]:vccr = MVE_VCMPs32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT17:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32r]], 0, $noreg
+ ; CHECK: bb.18:
+ ; CHECK: successors: %bb.19(0x80000000)
+ ; CHECK: [[MVE_VCMPs8r:%[0-9]+]]:vccr = MVE_VCMPs8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT18:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8r]], 0, $noreg
+ ; CHECK: bb.19:
+ ; CHECK: successors: %bb.20(0x80000000)
+ ; CHECK: [[MVE_VCMPu16r:%[0-9]+]]:vccr = MVE_VCMPu16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT19:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16r]], 0, $noreg
+ ; CHECK: bb.20:
+ ; CHECK: successors: %bb.21(0x80000000)
+ ; CHECK: [[MVE_VCMPu32r:%[0-9]+]]:vccr = MVE_VCMPu32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT20:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32r]], 0, $noreg
+ ; CHECK: bb.21:
+ ; CHECK: successors: %bb.22(0x80000000)
+ ; CHECK: [[MVE_VCMPu8r:%[0-9]+]]:vccr = MVE_VCMPu8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT21:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8r]], 0, $noreg
+ ; CHECK: bb.22:
+ ; CHECK: [[MVE_VCMPu8r1:%[0-9]+]]:vccr = MVE_VCMPu8r %1:mqpr, $zr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT22:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8r1]], 0, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
+ ;
+ ; Tests that VCMPs with an opposite condition are correctly converted into VPNOTs.
+ ;
+ bb.0:
+ %3:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %4:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.1:
+ %5:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %6:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.2:
+ %7:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %8:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.3:
+ %9:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %10:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.4:
+ %11:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %12:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.5:
+ %13:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %14:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.6:
+ %15:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %16:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.7:
+ %17:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %18:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.8:
+ %19:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %20:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.9:
+ %21:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %22:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.10:
+ %23:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %24:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 11, 0, $noreg
+
+ bb.11:
+ %25:vccr = MVE_VCMPf16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %26:vccr = MVE_VCMPf16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.12:
+ %27:vccr = MVE_VCMPf32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %28:vccr = MVE_VCMPf32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.13:
+ %29:vccr = MVE_VCMPi16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %30:vccr = MVE_VCMPi16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.14:
+ %31:vccr = MVE_VCMPi32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %32:vccr = MVE_VCMPi32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.15:
+ %33:vccr = MVE_VCMPi8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %34:vccr = MVE_VCMPi8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.16:
+ %35:vccr = MVE_VCMPs16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %36:vccr = MVE_VCMPs16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.17:
+ %37:vccr = MVE_VCMPs32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %38:vccr = MVE_VCMPs32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.18:
+ %39:vccr = MVE_VCMPs8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %40:vccr = MVE_VCMPs8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.19:
+ %41:vccr = MVE_VCMPu16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %42:vccr = MVE_VCMPu16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.20:
+ %43:vccr = MVE_VCMPu32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %44:vccr = MVE_VCMPu32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.21:
+ %45:vccr = MVE_VCMPu8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg
+ %46:vccr = MVE_VCMPu8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg
+
+ bb.22:
+ ; There shouldn't be any exception for $zr, so the second VCMP should
+ ; be transformed into a VPNOT.
+ %47:vccr = MVE_VCMPu8r %0:mqpr, $zr, 10, 0, $noreg
+ %48:vccr = MVE_VCMPu8r %0:mqpr, $zr, 11, 0, $noreg
+
+ tBX_RET 14, $noreg, implicit %0:mqpr
+...
+---
+name: vcmp_with_opposite_cond_and_swapped_operands
+alignment: 4
+body: |
+ ; CHECK-LABEL: name: vcmp_with_opposite_cond_and_swapped_operands
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16_]], 0, $noreg
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT1:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32_]], 0, $noreg
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: [[MVE_VCMPi8_:%[0-9]+]]:vccr = MVE_VCMPi8 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT2:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8_]], 0, $noreg
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.4(0x80000000)
+ ; CHECK: [[MVE_VCMPs16_:%[0-9]+]]:vccr = MVE_VCMPs16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT3:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16_]], 0, $noreg
+ ; CHECK: bb.4:
+ ; CHECK: successors: %bb.5(0x80000000)
+ ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT4:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg
+ ; CHECK: bb.5:
+ ; CHECK: successors: %bb.6(0x80000000)
+ ; CHECK: [[MVE_VCMPs8_:%[0-9]+]]:vccr = MVE_VCMPs8 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT5:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8_]], 0, $noreg
+ ; CHECK: bb.6:
+ ; CHECK: successors: %bb.7(0x80000000)
+ ; CHECK: [[MVE_VCMPu16_:%[0-9]+]]:vccr = MVE_VCMPu16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT6:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16_]], 0, $noreg
+ ; CHECK: bb.7:
+ ; CHECK: successors: %bb.8(0x80000000)
+ ; CHECK: [[MVE_VCMPu32_:%[0-9]+]]:vccr = MVE_VCMPu32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT7:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32_]], 0, $noreg
+ ; CHECK: bb.8:
+ ; CHECK: [[MVE_VCMPu8_:%[0-9]+]]:vccr = MVE_VCMPu8 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT8:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8_]], 0, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
+ ;
+ ; Tests that VCMPs with an opposite condition and swapped operands are
+ ; correctly converted into VPNOTs.
+ ;
+ bb.0:
+ %2:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %3:vccr = MVE_VCMPi16 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ bb.1:
+ %4:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %5:vccr = MVE_VCMPi32 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ bb.2:
+ %6:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %7:vccr = MVE_VCMPi8 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ bb.3:
+ %8:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %9:vccr = MVE_VCMPs16 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ bb.4:
+ %10:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %11:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ bb.5:
+ %12:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %13:vccr = MVE_VCMPs8 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ bb.6:
+ %14:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %15:vccr = MVE_VCMPu16 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ bb.7:
+ %16:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %17:vccr = MVE_VCMPu32 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ bb.8:
+ %18:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %19:vccr = MVE_VCMPu8 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ tBX_RET 14, $noreg, implicit %0:mqpr
+...
+---
+name: triple_vcmp
+alignment: 4
+body: |
+ ;
+ ; Tests that, when there are 2 "VPNOT-like VCMPs" in a row, only the first
+ ; becomes a VPNOT.
+ ;
+ bb.0:
+ ; CHECK-LABEL: name: triple_vcmp
+ ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg
+ ; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 12, 0, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
+ %2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %3:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg
+ %4:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg
+ tBX_RET 14, $noreg, implicit %0:mqpr
+...
+---
+name: killed_vccr_values
+alignment: 4
+body: |
+ bb.0:
+ ;
+ ; Tests that, if the result of the VCMP is killed before the
+ ; second VCMP (that will be converted into a VPNOT) is found,
+ ; the kill flag is removed.
+ ;
+ ; CHECK-LABEL: name: killed_vccr_values
+ ; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VORR:%[0-9]+]]:mqpr = MVE_VORR %1:mqpr, %2:mqpr, 1, [[MVE_VCMPf16_]], undef [[MVE_VORR]]
+ ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16_]], 0, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
+ %2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %3:mqpr = MVE_VORR %0:mqpr, %1:mqpr, 1, killed %2:vccr, undef %3:mqpr
+ %4:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 0, $noreg
+ tBX_RET 14, $noreg, implicit %0:mqpr
+...
+---
+name: predicated_vcmps
+alignment: 4
+body: |
+ ; CHECK-LABEL: name: predicated_vcmps
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPi16_1:%[0-9]+]]:vccr = MVE_VCMPi16 %2:mqpr, %1:mqpr, 12, 1, [[MVE_VCMPi16_]]
+ ; CHECK: [[MVE_VCMPi16_2:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi16_]]
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPi32_1:%[0-9]+]]:vccr = MVE_VCMPi32 %2:mqpr, %1:mqpr, 12, 1, [[MVE_VCMPi32_]]
+ ; CHECK: [[MVE_VCMPi32_2:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi32_]]
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPf16_1:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPf16_]]
+ ; CHECK: [[MVE_VCMPf16_2:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPf16_]]
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.4(0x80000000)
+ ; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPf32_1:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPf32_]]
+ ; CHECK: [[MVE_VCMPf32_2:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPf32_]]
+ ; CHECK: bb.4:
+ ; CHECK: successors: %bb.5(0x80000000)
+ ; CHECK: [[MVE_VCMPi16_3:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPi16_4:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPi16_3]]
+ ; CHECK: [[MVE_VCMPi16_5:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi16_3]]
+ ; CHECK: bb.5:
+ ; CHECK: [[MVE_VCMPi32_3:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPi32_4:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPi32_3]]
+ ; CHECK: [[MVE_VCMPi32_5:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi32_3]]
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
+ ;
+ ; Tests that predicated VCMPs are not replaced.
+ ;
+ bb.0:
+ %2:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %3:vccr = MVE_VCMPi16 %1:mqpr, %0:mqpr, 12, 1, %2:vccr
+ %4:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 1, %2:vccr
+
+ bb.1:
+ %5:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %6:vccr = MVE_VCMPi32 %1:mqpr, %0:mqpr, 12, 1, %5:vccr
+ %7:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 1, %5:vccr
+
+ bb.2:
+ %8:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %9:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 1, %8:vccr
+ %10:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 1, %8:vccr
+
+ bb.3:
+ %11:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %12:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 11, 1, %11:vccr
+ %13:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 1, %11:vccr
+
+ bb.4:
+ %14:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %15:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 11, 1, %14:vccr
+ %16:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 1, %14:vccr
+
+ bb.5:
+ %17:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %18:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 11, 1, %17:vccr
+ %19:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 1, %17:vccr
+
+ tBX_RET 14, $noreg, implicit %0:mqpr
+...
+---
+name: flt_with_swapped_operands
+alignment: 4
+body: |
+ ; CHECK-LABEL: name: flt_with_swapped_operands
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPf16_1:%[0-9]+]]:vccr = MVE_VCMPf16 %2:mqpr, %1:mqpr, 12, 0, $noreg
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPf32_1:%[0-9]+]]:vccr = MVE_VCMPf32 %2:mqpr, %1:mqpr, 12, 0, $noreg
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: [[MVE_VCMPf16_2:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPf16_3:%[0-9]+]]:vccr = MVE_VCMPf16 %2:mqpr, %1:mqpr, 11, 0, $noreg
+ ; CHECK: bb.3:
+ ; CHECK: [[MVE_VCMPf32_2:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPf32_3:%[0-9]+]]:vccr = MVE_VCMPf32 %2:mqpr, %1:mqpr, 11, 0, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
+ ;
+ ; Tests that float VCMPs with an opposite condition and swapped operands
+ ; are not transformed into VPNOTs.
+ ;
+ bb.0:
+ %2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %3:vccr = MVE_VCMPf16 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ bb.1:
+ %4:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %5:vccr = MVE_VCMPf32 %1:mqpr, %0:mqpr, 12, 0, $noreg
+
+ bb.2:
+ %6:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %7:vccr = MVE_VCMPf16 %1:mqpr, %0:mqpr, 11, 0, $noreg
+
+ bb.3:
+ %8:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %9:vccr = MVE_VCMPf32 %1:mqpr, %0:mqpr, 11, 0, $noreg
+ tBX_RET 14, $noreg, implicit %0:mqpr
+...
+---
+name:
diff erent_opcodes
+alignment: 4
+body: |
+ ;
+ ; Tests that a "VPNOT-like VCMP" with an opcode
diff erent from the previous VCMP
+ ; is not transformed into a VPNOT.
+ ;
+ bb.0:
+ ; CHECK-LABEL: name:
diff erent_opcodes
+ ; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 0, 0, $noreg
+ ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 1, 1, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
+ %2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 0, 0, $noreg
+ %3:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 1, 1, $noreg
+ tBX_RET 14, $noreg, implicit %0:mqpr
+...
+---
+name: incorrect_condcode
+alignment: 4
+body: |
+ ; CHECK-LABEL: name: incorrect_condcode
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 11, 0, $noreg
+ ; CHECK: bb.1:
+ ; CHECK: [[MVE_VCMPs32_2:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg
+ ; CHECK: [[MVE_VCMPs32_3:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 12, 0, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
+ ;
+ ; Tests that a VCMP is not transformed into a VPNOT if its CondCode is not
+ ; the opposite CondCode.
+ ;
+ bb.0:
+ %2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %3:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 11, 0, $noreg
+ bb.1:
+ %4:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg
+ %5:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 12, 0, $noreg
+ tBX_RET 14, $noreg, implicit %0:mqpr
+...
+---
+name: vpr_or_vccr_write_between_vcmps
+alignment: 4
+body: |
+ ;
+ ; Tests that a "VPNOT-like VCMP" will not be transformed into a VPNOT if
+ ; VCCR/VPR is written to in-between.
+ ;
+ bb.0:
+ ; CHECK-LABEL: name: vpr_or_vccr_write_between_vcmps
+ ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 12, 0, $noreg
+ ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT killed [[MVE_VCMPs32_]], 0, $noreg
+ ; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 10, 0, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr
+ %2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 12, 0, $noreg
+ %3:vccr = MVE_VPNOT killed %2:vccr, 0, $noreg
+ %4:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 10, 0, $noreg
+ tBX_RET 14, $noreg, implicit %0:mqpr
+...
More information about the llvm-commits
mailing list