[llvm] 2823340 - [CodeGen] [ARM] Make RISC-V Init Undef Pass Target Independent and add support for the ARM Architecture. (#77770)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 26 04:12:36 PST 2024
Author: Jack Styles
Date: 2024-02-26T12:12:31Z
New Revision: 28233408a2c8670d7d94ae1bf18a2bb5f7194c32
URL: https://github.com/llvm/llvm-project/commit/28233408a2c8670d7d94ae1bf18a2bb5f7194c32
DIFF: https://github.com/llvm/llvm-project/commit/28233408a2c8670d7d94ae1bf18a2bb5f7194c32.diff
LOG: [CodeGen] [ARM] Make RISC-V Init Undef Pass Target Independent and add support for the ARM Architecture. (#77770)
When using Greedy Register Allocation, there are times where
early-clobber values are ignored, and assigned the same register. This
is illeagal behaviour for these intructions. To get around this, using
Pseudo instructions for early-clobber registers gives them a definition
and allows Greedy to assign them to a different register. This then
meets the ARM Architecture Reference Manual and matches the defined
behaviour.
This patch takes the existing RISC-V patch and makes it target
independent, then adds support for the ARM Architecture. Doing this will
ensure early-clobber restraints are followed when using the ARM
Architecture. Making the pass target independent will also open up
possibility that support other architectures can be added in the future.
Added:
llvm/lib/CodeGen/InitUndef.cpp
Modified:
llvm/include/llvm/CodeGen/Passes.h
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/include/llvm/CodeGen/TargetRegisterInfo.h
llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
llvm/include/llvm/InitializePasses.h
llvm/include/llvm/Passes/CodeGenPassBuilder.h
llvm/include/llvm/Passes/MachinePassRegistry.def
llvm/lib/CodeGen/CMakeLists.txt
llvm/lib/CodeGen/CodeGen.cpp
llvm/lib/CodeGen/TargetPassConfig.cpp
llvm/lib/Target/ARM/ARMAsmPrinter.cpp
llvm/lib/Target/ARM/ARMBaseInstrInfo.h
llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
llvm/lib/Target/ARM/ARMInstrInfo.td
llvm/lib/Target/ARM/ARMSubtarget.h
llvm/lib/Target/RISCV/CMakeLists.txt
llvm/lib/Target/RISCV/RISCV.h
llvm/lib/Target/RISCV/RISCVInstrInfo.h
llvm/lib/Target/RISCV/RISCVRegisterInfo.h
llvm/lib/Target/RISCV/RISCVSubtarget.h
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
llvm/test/CodeGen/AArch64/O3-pipeline.ll
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
llvm/test/CodeGen/ARM/O3-pipeline.ll
llvm/test/CodeGen/LoongArch/opt-pipeline.ll
llvm/test/CodeGen/PowerPC/O3-pipeline.ll
llvm/test/CodeGen/RISCV/O0-pipeline.ll
llvm/test/CodeGen/RISCV/O3-pipeline.ll
llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir
llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir
llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir
llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll
llvm/test/CodeGen/X86/opt-pipeline.ll
llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
Removed:
llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
################################################################################
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 3f0d81fa1d14ba..f850767270a4fd 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -196,6 +196,11 @@ namespace llvm {
/// This pass reads flow sensitive profile.
extern char &MIRProfileLoaderPassID;
+ // This pass gives undef values a Pseudo Instruction definition for
+ // Instructions to ensure early-clobber is followed when using the greedy
+ // register allocator.
+ extern char &InitUndefID;
+
/// FastRegisterAllocation Pass - This pass register allocates as fast as
/// possible. It is best suited for debug code where live ranges are short.
///
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 138c65785430f0..e7787aafb98e2d 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2223,6 +2223,15 @@ class TargetInstrInfo : public MCInstrInfo {
llvm_unreachable("unknown number of operands necessary");
}
+ /// Gets the opcode for the Pseudo Instruction used to initialize
+ /// the undef value. If no Instruction is available, this will
+ /// fail compilation.
+ virtual unsigned getUndefInitOpcode(unsigned RegClassID) const {
+ (void)RegClassID;
+
+ llvm_unreachable("Unexpected register class.");
+ }
+
private:
mutable std::unique_ptr<MIRFormatter> Formatter;
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 5098fc68df3b20..e7c9ecd2e1851a 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -1172,6 +1172,28 @@ class TargetRegisterInfo : public MCRegisterInfo {
virtual bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const {
return false;
}
+
+ /// Returns the Largest Super Class that is being initialized. There
+ /// should be a Pseudo Instruction implemented for the super class
+ /// that is being returned to ensure that Init Undef can apply the
+ /// initialization correctly.
+ virtual const TargetRegisterClass *
+ getLargestSuperClass(const TargetRegisterClass *RC) const {
+ llvm_unreachable("Unexpected target register class.");
+ }
+
+ /// Returns if the architecture being targeted has the required Pseudo
+ /// Instructions for initializing the register. By default this returns false,
+ /// but where it is overriden for an architecture, the behaviour will be
+ ///
diff erent. This can either be a check to ensure the Register Class is
+ /// present, or to return true as an indication the architecture supports the
+ /// pass. If using the method that does not check for the Register Class, it
+ /// is imperative to ensure all required Pseudo Instructions are implemented,
+ /// otherwise compilation may fail with an `Unexpected register class` error.
+ virtual bool
+ doesRegClassHavePseudoInitUndef(const TargetRegisterClass *RC) const {
+ return false;
+ }
};
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index a064dec7d8ab38..7f8ed5c5019890 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -327,6 +327,12 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
/// Get the list of MacroFusion predicates.
virtual std::vector<MacroFusionPredTy> getMacroFusions() const { return {}; };
+
+ /// supportsInitUndef is used to determine if an architecture supports
+ /// the Init Undef Pass. By default, it is assumed that it will not support
+ /// the pass, with architecture specific overrides providing the information
+ /// where they are implemented.
+ virtual bool supportsInitUndef() const { return false; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index ee91c3ec3ddc2f..e4bf3868d00069 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -300,6 +300,7 @@ void initializeTLSVariableHoistLegacyPassPass(PassRegistry &);
void initializeTwoAddressInstructionPassPass(PassRegistry&);
void initializeTypeBasedAAWrapperPassPass(PassRegistry&);
void initializeTypePromotionLegacyPass(PassRegistry&);
+void initializeInitUndefPass(PassRegistry &);
void initializeUniformityInfoWrapperPassPass(PassRegistry &);
void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &);
void initializeUnpackMachineBundlesPass(PassRegistry&);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index dc60727729f739..82a17e882b3c47 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1066,6 +1066,8 @@ void CodeGenPassBuilder<Derived>::addOptimizedRegAlloc(
AddMachinePass &addPass) const {
addPass(DetectDeadLanesPass());
+ addPass(InitUndefPass());
+
addPass(ProcessImplicitDefsPass());
// Edge splitting is smarter with machine loop info.
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index d8972080beeb0d..016602730e0e97 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -173,6 +173,7 @@ DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass)
DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass)
DUMMY_MACHINE_FUNCTION_PASS("gc-empty-basic-blocks", GCEmptyBasicBlocksPass)
DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass)
+DUMMY_MACHINE_FUNCTION_PASS("init-undef-pass", InitUndefPass)
DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass)
DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass)
DUMMY_MACHINE_FUNCTION_PASS("kcfi", MachineKCFIPass)
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index d49bcf8a0c8ee2..e02c1d6417e077 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -75,6 +75,7 @@ add_llvm_component_library(LLVMCodeGen
IfConversion.cpp
ImplicitNullChecks.cpp
IndirectBrExpandPass.cpp
+ InitUndef.cpp
InlineSpiller.cpp
InterferenceCache.cpp
InterleavedAccessPass.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index be1813451228d6..544f1b7f593531 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -54,6 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
initializeIndirectBrExpandLegacyPassPass(Registry);
+ initializeInitUndefPass(Registry);
initializeInterleavedLoadCombinePass(Registry);
initializeInterleavedAccessPass(Registry);
initializeJMCInstrumenterPass(Registry);
diff --git a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp
similarity index 61%
rename from llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
rename to llvm/lib/CodeGen/InitUndef.cpp
index 735fc1350c0091..96ac385b6abf82 100644
--- a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
+++ b/llvm/lib/CodeGen/InitUndef.cpp
@@ -1,4 +1,4 @@
-//===- RISCVRVVInitUndef.cpp - Initialize undef vector value to pseudo ----===//
+//===- InitUndef.cpp - Initialize undef value to pseudo ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,23 +6,22 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements a function pass that initializes undef vector value to
-// temporary pseudo instruction and remove it in expandpseudo pass to prevent
-// register allocation resulting in a constraint violated result for vector
-// instruction. It also rewrites the NoReg tied operand back to an
-// IMPLICIT_DEF.
+// This file implements a function pass that initializes undef value to
+// temporary pseudo instruction to prevent register allocation resulting in a
+// constraint violated result for the particular instruction. It also rewrites
+// the NoReg tied operand back to an IMPLICIT_DEF.
//
-// RISC-V vector instruction has register overlapping constraint for certain
-// instructions, and will cause illegal instruction trap if violated, we use
-// early clobber to model this constraint, but it can't prevent register
-// allocator allocated same or overlapped if the input register is undef value,
-// so convert IMPLICIT_DEF to temporary pseudo instruction and remove it later
-// could prevent that happen, it's not best way to resolve this, and it might
+// Certain instructions have register overlapping constraints, and
+// will cause illegal instruction trap if violated, we use early clobber to
+// model this constraint, but it can't prevent register allocator allocating
+// same or overlapped if the input register is undef value, so convert
+// IMPLICIT_DEF to temporary pseudo instruction and remove it later could
+// prevent that happen, it's not best way to resolve this, and it might
// change the order of program or increase the register pressure, so ideally we
// should model the constraint right, but before we model the constraint right,
// it's the only way to prevent that happen.
//
-// When we enable the subregister liveness option, it will also trigger same
+// When we enable the subregister liveness option, it will also trigger the same
// issue due to the partial of register is undef. If we pseudoinit the whole
// register, then it will generate redundant COPY instruction. Currently, it
// will generate INSERT_SUBREG to make sure the whole register is occupied
@@ -31,7 +30,7 @@
//
// See also: https://github.com/llvm/llvm-project/issues/50157
//
-// Additionally, this pass rewrites tied operands of vector instructions
+// Additionally, this pass rewrites tied operands of instructions
// from NoReg to IMPLICIT_DEF. (Not that this is a non-overlapping set of
// operands to the above.) We use NoReg to side step a MachineCSE
// optimization quality problem but need to convert back before
@@ -39,23 +38,31 @@
//
//===----------------------------------------------------------------------===//
-#include "RISCV.h"
-#include "RISCVSubtarget.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/DetectDeadLanes.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+
using namespace llvm;
-#define DEBUG_TYPE "riscv-init-undef"
-#define RISCV_INIT_UNDEF_NAME "RISC-V init undef pass"
+#define DEBUG_TYPE "init-undef"
+#define INIT_UNDEF_NAME "Init Undef Pass"
namespace {
-class RISCVInitUndef : public MachineFunctionPass {
+class InitUndef : public MachineFunctionPass {
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
- const RISCVSubtarget *ST;
+ const TargetSubtargetInfo *ST;
const TargetRegisterInfo *TRI;
// Newly added vregs, assumed to be fully rewritten
@@ -65,7 +72,7 @@ class RISCVInitUndef : public MachineFunctionPass {
public:
static char ID;
- RISCVInitUndef() : MachineFunctionPass(ID) {}
+ InitUndef() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -73,14 +80,11 @@ class RISCVInitUndef : public MachineFunctionPass {
MachineFunctionPass::getAnalysisUsage(AU);
}
- StringRef getPassName() const override { return RISCV_INIT_UNDEF_NAME; }
+ StringRef getPassName() const override { return INIT_UNDEF_NAME; }
private:
bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
const DeadLaneDetector &DLD);
- bool isVectorRegClass(const Register R);
- const TargetRegisterClass *
- getVRLargestSuperClass(const TargetRegisterClass *RC) const;
bool handleSubReg(MachineFunction &MF, MachineInstr &MI,
const DeadLaneDetector &DLD);
bool fixupIllOperand(MachineInstr *MI, MachineOperand &MO);
@@ -89,45 +93,9 @@ class RISCVInitUndef : public MachineFunctionPass {
} // end anonymous namespace
-char RISCVInitUndef::ID = 0;
-INITIALIZE_PASS(RISCVInitUndef, DEBUG_TYPE, RISCV_INIT_UNDEF_NAME, false, false)
-char &llvm::RISCVInitUndefID = RISCVInitUndef::ID;
-
-const TargetRegisterClass *
-RISCVInitUndef::getVRLargestSuperClass(const TargetRegisterClass *RC) const {
- if (RISCV::VRM8RegClass.hasSubClassEq(RC))
- return &RISCV::VRM8RegClass;
- if (RISCV::VRM4RegClass.hasSubClassEq(RC))
- return &RISCV::VRM4RegClass;
- if (RISCV::VRM2RegClass.hasSubClassEq(RC))
- return &RISCV::VRM2RegClass;
- if (RISCV::VRRegClass.hasSubClassEq(RC))
- return &RISCV::VRRegClass;
- return RC;
-}
-
-bool RISCVInitUndef::isVectorRegClass(const Register R) {
- const TargetRegisterClass *RC = MRI->getRegClass(R);
- return RISCV::VRRegClass.hasSubClassEq(RC) ||
- RISCV::VRM2RegClass.hasSubClassEq(RC) ||
- RISCV::VRM4RegClass.hasSubClassEq(RC) ||
- RISCV::VRM8RegClass.hasSubClassEq(RC);
-}
-
-static unsigned getUndefInitOpcode(unsigned RegClassID) {
- switch (RegClassID) {
- case RISCV::VRRegClassID:
- return RISCV::PseudoRVVInitUndefM1;
- case RISCV::VRM2RegClassID:
- return RISCV::PseudoRVVInitUndefM2;
- case RISCV::VRM4RegClassID:
- return RISCV::PseudoRVVInitUndefM4;
- case RISCV::VRM8RegClassID:
- return RISCV::PseudoRVVInitUndefM8;
- default:
- llvm_unreachable("Unexpected register class.");
- }
-}
+char InitUndef::ID = 0;
+INITIALIZE_PASS(InitUndef, DEBUG_TYPE, INIT_UNDEF_NAME, false, false)
+char &llvm::InitUndefID = InitUndef::ID;
static bool isEarlyClobberMI(MachineInstr &MI) {
return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) {
@@ -143,7 +111,7 @@ static bool findImplictDefMIFromReg(Register Reg, MachineRegisterInfo *MRI) {
return false;
}
-bool RISCVInitUndef::handleReg(MachineInstr *MI) {
+bool InitUndef::handleReg(MachineInstr *MI) {
bool Changed = false;
for (auto &UseMO : MI->uses()) {
if (!UseMO.isReg())
@@ -152,7 +120,7 @@ bool RISCVInitUndef::handleReg(MachineInstr *MI) {
continue;
if (!UseMO.getReg().isVirtual())
continue;
- if (!isVectorRegClass(UseMO.getReg()))
+ if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
continue;
if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI))
@@ -161,8 +129,8 @@ bool RISCVInitUndef::handleReg(MachineInstr *MI) {
return Changed;
}
-bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
- const DeadLaneDetector &DLD) {
+bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
+ const DeadLaneDetector &DLD) {
bool Changed = false;
for (MachineOperand &UseMO : MI.uses()) {
@@ -172,6 +140,8 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
continue;
if (UseMO.isTied())
continue;
+ if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
+ continue;
Register Reg = UseMO.getReg();
if (NewRegs.count(Reg))
@@ -183,7 +153,7 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
continue;
const TargetRegisterClass *TargetRegClass =
- getVRLargestSuperClass(MRI->getRegClass(Reg));
+ TRI->getLargestSuperClass(MRI->getRegClass(Reg));
LaneBitmask NeedDef = Info.UsedLanes & ~Info.DefinedLanes;
@@ -202,11 +172,12 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
Register LatestReg = Reg;
for (auto ind : SubRegIndexNeedInsert) {
Changed = true;
- const TargetRegisterClass *SubRegClass =
- getVRLargestSuperClass(TRI->getSubRegisterClass(TargetRegClass, ind));
+ const TargetRegisterClass *SubRegClass = TRI->getLargestSuperClass(
+ TRI->getSubRegisterClass(TargetRegClass, ind));
Register TmpInitSubReg = MRI->createVirtualRegister(SubRegClass);
+ LLVM_DEBUG(dbgs() << "Register Class ID" << SubRegClass->getID() << "\n");
BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
- TII->get(getUndefInitOpcode(SubRegClass->getID())),
+ TII->get(TII->getUndefInitOpcode(SubRegClass->getID())),
TmpInitSubReg);
Register NewReg = MRI->createVirtualRegister(TargetRegClass);
BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
@@ -223,15 +194,16 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
return Changed;
}
-bool RISCVInitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) {
+bool InitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) {
LLVM_DEBUG(
- dbgs() << "Emitting PseudoRVVInitUndef for implicit vector register "
+ dbgs() << "Emitting PseudoInitUndef Instruction for implicit register "
<< MO.getReg() << '\n');
const TargetRegisterClass *TargetRegClass =
- getVRLargestSuperClass(MRI->getRegClass(MO.getReg()));
- unsigned Opcode = getUndefInitOpcode(TargetRegClass->getID());
+ TRI->getLargestSuperClass(MRI->getRegClass(MO.getReg()));
+ LLVM_DEBUG(dbgs() << "Register Class ID" << TargetRegClass->getID() << "\n");
+ unsigned Opcode = TII->getUndefInitOpcode(TargetRegClass->getID());
Register NewReg = MRI->createVirtualRegister(TargetRegClass);
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(Opcode), NewReg);
MO.setReg(NewReg);
@@ -240,9 +212,8 @@ bool RISCVInitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) {
return true;
}
-bool RISCVInitUndef::processBasicBlock(MachineFunction &MF,
- MachineBasicBlock &MBB,
- const DeadLaneDetector &DLD) {
+bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
+ const DeadLaneDetector &DLD) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
MachineInstr &MI = *I;
@@ -252,15 +223,15 @@ bool RISCVInitUndef::processBasicBlock(MachineFunction &MF,
unsigned UseOpIdx;
if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
MachineOperand &UseMO = MI.getOperand(UseOpIdx);
- if (UseMO.getReg() == RISCV::NoRegister) {
+ if (UseMO.getReg() == MCRegister::NoRegister) {
const TargetRegisterClass *RC =
- TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF);
+ TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF);
Register NewDest = MRI->createVirtualRegister(RC);
// We don't have a way to update dead lanes, so keep track of the
// new register so that we avoid querying it later.
NewRegs.insert(NewDest);
- BuildMI(MBB, I, I->getDebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF), NewDest);
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
+ NewDest);
UseMO.setReg(NewDest);
Changed = true;
}
@@ -275,9 +246,16 @@ bool RISCVInitUndef::processBasicBlock(MachineFunction &MF,
return Changed;
}
-bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) {
- ST = &MF.getSubtarget<RISCVSubtarget>();
- if (!ST->hasVInstructions())
+bool InitUndef::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget();
+
+ // supportsInitUndef is implemented to reflect if an architecture has support
+ // for the InitUndef pass. Support comes from having the relevant Pseudo
+ // instructions that can be used to initialize the register. The function
+ // returns false by default so requires an implementation per architecture.
+ // Support can be added by overriding the function in a way that best fits
+ // the architecture.
+ if (!ST->supportsInitUndef())
return false;
MRI = &MF.getRegInfo();
@@ -297,5 +275,3 @@ bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-
-FunctionPass *llvm::createRISCVInitUndefPass() { return new RISCVInitUndef(); }
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 2ed39a5696e205..cf068ece8d4cab 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1427,6 +1427,8 @@ void TargetPassConfig::addFastRegAlloc() {
void TargetPassConfig::addOptimizedRegAlloc() {
addPass(&DetectDeadLanesID);
+ addPass(&InitUndefID);
+
addPass(&ProcessImplicitDefsID);
// LiveVariables currently requires pure SSA form.
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 15cda9b9432d5f..642739a29d6b06 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -2409,6 +2409,12 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
case ARM::SEH_EpilogEnd:
ATS.emitARMWinCFIEpilogEnd();
return;
+
+ case ARM::PseudoARMInitUndefMQPR:
+ case ARM::PseudoARMInitUndefSPR:
+ case ARM::PseudoARMInitUndefDPR_VFP2:
+ case ARM::PseudoARMInitUndefGPR:
+ return;
}
MCInst TmpInst;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index c3b475e0306eee..30f0730774b78c 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -13,16 +13,21 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
#define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
+#include "ARMBaseRegisterInfo.h"
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/Support/ErrorHandling.h"
#include <array>
#include <cstdint>
@@ -536,6 +541,19 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
Register Reg) const override;
+
+ unsigned getUndefInitOpcode(unsigned RegClassID) const override {
+ if (RegClassID == ARM::MQPRRegClass.getID())
+ return ARM::PseudoARMInitUndefMQPR;
+ if (RegClassID == ARM::SPRRegClass.getID())
+ return ARM::PseudoARMInitUndefSPR;
+ if (RegClassID == ARM::DPR_VFP2RegClass.getID())
+ return ARM::PseudoARMInitUndefDPR_VFP2;
+ if (RegClassID == ARM::GPRRegClass.getID())
+ return ARM::PseudoARMInitUndefGPR;
+
+ llvm_unreachable("Unexpected register class.");
+ }
};
/// Get the operands corresponding to the given \p Pred value. By default, the
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 926d702b4092a5..53803cff8b90ac 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -240,6 +240,33 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
unsigned SrcSubReg) const override;
int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }
+
+ const TargetRegisterClass *
+ getLargestSuperClass(const TargetRegisterClass *RC) const override {
+ if (ARM::MQPRRegClass.hasSubClassEq(RC))
+ return &ARM::MQPRRegClass;
+ if (ARM::SPRRegClass.hasSubClassEq(RC))
+ return &ARM::SPRRegClass;
+ if (ARM::DPR_VFP2RegClass.hasSubClassEq(RC))
+ return &ARM::DPR_VFP2RegClass;
+ if (ARM::GPRRegClass.hasSubClassEq(RC))
+ return &ARM::GPRRegClass;
+ return RC;
+ }
+
+ bool doesRegClassHavePseudoInitUndef(
+ const TargetRegisterClass *RC) const override {
+ (void)RC;
+ // For the ARM Architecture we want to always return true because all
+ // required PseudoInitUndef types have been added. If compilation fails due
+ // to `Unexpected register class`, this is likely to be because the specific
+ // register being used is not support by Init Undef and needs the Pseudo
+ // Instruction adding to ARMInstrInfo.td. If this is implemented as a
+ // conditional check, this could create a false positive where Init Undef is
+ // not running, skipping the instruction and moving to the next. This could
+ // lead to illegal instructions being generated by the register allocator.
+ return true;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 074dea36b64145..08b519e4d5cbf5 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -6534,3 +6534,15 @@ let isPseudo = 1 in {
let isTerminator = 1 in
def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
}
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions for use when early-clobber is defined and Greedy Register
+// Allocation is used. This ensures the constraint is used properly.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
+ def PseudoARMInitUndefMQPR : PseudoInst<(outs MQPR:$vd), (ins), NoItinerary, []>;
+ def PseudoARMInitUndefSPR : PseudoInst<(outs SPR:$sd), (ins), NoItinerary, []>;
+ def PseudoARMInitUndefDPR_VFP2 : PseudoInst<(outs DPR_VFP2:$dd), (ins), NoItinerary, []>;
+ def PseudoARMInitUndefGPR : PseudoInst<(outs GPR:$rd), (ins), NoItinerary, []>;
+}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 91f3978b041a3a..044b1c4c54e0c8 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -278,6 +278,13 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
return &InstrInfo->getRegisterInfo();
}
+ /// The correct instructions have been implemented to initialize undef
+ /// registers, therefore the ARM Architecture is supported by the Init Undef
+ /// Pass. This will return true as the pass needs to be supported for all
+ /// types of instructions. The pass will then perform more checks to ensure it
+ /// should be applying the Pseudo Instructions.
+ bool supportsInitUndef() const override { return true; }
+
const CallLowering *getCallLowering() const override;
InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index ac88cd49db4e4b..8715403f3839a6 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -51,7 +51,6 @@ add_llvm_target(RISCVCodeGen
RISCVMoveMerger.cpp
RISCVPushPopOptimizer.cpp
RISCVRegisterInfo.cpp
- RISCVRVVInitUndef.cpp
RISCVSubtarget.cpp
RISCVTargetMachine.cpp
RISCVTargetObjectFile.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 9eb18099894b21..7af543f018ccbd 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -72,10 +72,6 @@ void initializeRISCVInsertWriteVXRMPass(PassRegistry &);
FunctionPass *createRISCVRedundantCopyEliminationPass();
void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);
-FunctionPass *createRISCVInitUndefPass();
-void initializeRISCVInitUndefPass(PassRegistry &);
-extern char &RISCVInitUndefID;
-
FunctionPass *createRISCVMoveMergePass();
void initializeRISCVMoveMergePass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 0f7d3e4e433908..2d567342599636 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H
#define LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H
+#include "RISCV.h"
#include "RISCVRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -20,6 +21,7 @@
#define GET_INSTRINFO_HEADER
#define GET_INSTRINFO_OPERAND_ENUM
#include "RISCVGenInstrInfo.inc"
+#include "RISCVGenRegisterInfo.inc"
namespace llvm {
@@ -262,6 +264,21 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const override;
+ unsigned getUndefInitOpcode(unsigned RegClassID) const override {
+ switch (RegClassID) {
+ case RISCV::VRRegClassID:
+ return RISCV::PseudoRVVInitUndefM1;
+ case RISCV::VRM2RegClassID:
+ return RISCV::PseudoRVVInitUndefM2;
+ case RISCV::VRM4RegClassID:
+ return RISCV::PseudoRVVInitUndefM4;
+ case RISCV::VRM8RegClassID:
+ return RISCV::PseudoRVVInitUndefM8;
+ default:
+ llvm_unreachable("Unexpected register class.");
+ }
+ }
+
protected:
const RISCVSubtarget &STI;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 431ea23b3e2d04..e46fe8ecb900fc 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -96,6 +96,27 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
+
+ const TargetRegisterClass *
+ getLargestSuperClass(const TargetRegisterClass *RC) const override {
+ if (RISCV::VRM8RegClass.hasSubClassEq(RC))
+ return &RISCV::VRM8RegClass;
+ if (RISCV::VRM4RegClass.hasSubClassEq(RC))
+ return &RISCV::VRM4RegClass;
+ if (RISCV::VRM2RegClass.hasSubClassEq(RC))
+ return &RISCV::VRM2RegClass;
+ if (RISCV::VRRegClass.hasSubClassEq(RC))
+ return &RISCV::VRRegClass;
+ return RC;
+ }
+
+ bool doesRegClassHavePseudoInitUndef(
+ const TargetRegisterClass *RC) const override {
+ return RISCV::VRRegClass.hasSubClassEq(RC) ||
+ RISCV::VRM2RegClass.hasSubClassEq(RC) ||
+ RISCV::VRM4RegClass.hasSubClassEq(RC) ||
+ RISCV::VRM8RegClass.hasSubClassEq(RC);
+ }
};
}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 9ebf278d6749f0..ba108912d93400 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -289,6 +289,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
};
unsigned getMinimumJumpTableEntries() const;
+
+ bool supportsInitUndef() const override { return hasVInstructions(); }
};
} // End llvm namespace
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index be2d4c5c17d1e7..6fe0abaccb9d98 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -124,7 +124,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVInsertReadWriteCSRPass(*PR);
initializeRISCVInsertWriteVXRMPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
- initializeRISCVInitUndefPass(*PR);
initializeRISCVMoveMergePass(*PR);
initializeRISCVPushPopOptPass(*PR);
}
@@ -383,7 +382,6 @@ class RISCVPassConfig : public TargetPassConfig {
bool addRegAssignAndRewriteOptimized() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
- void addOptimizedRegAlloc() override;
void addFastRegAlloc() override;
};
} // namespace
@@ -564,14 +562,8 @@ void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVInsertWriteVXRMPass());
}
-void RISCVPassConfig::addOptimizedRegAlloc() {
- insertPass(&DetectDeadLanesID, &RISCVInitUndefID);
-
- TargetPassConfig::addOptimizedRegAlloc();
-}
-
void RISCVPassConfig::addFastRegAlloc() {
- addPass(createRISCVInitUndefPass());
+ addPass(&InitUndefID);
TargetPassConfig::addFastRegAlloc();
}
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 638f26298ee26a..ae0dbed09979b4 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -155,6 +155,7 @@
; CHECK-NEXT: AArch64 MI Peephole Optimization pass
; CHECK-NEXT: AArch64 Dead register definitions
; CHECK-NEXT: Detect Dead Lanes
+; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 48f00a82e3e1c6..c67328a025b858 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -321,6 +321,7 @@
; GCN-O1-NEXT: Register Usage Information Propagation
; GCN-O1-NEXT: Detect Dead Lanes
; GCN-O1-NEXT: Remove dead machine instructions
+; GCN-O1-NEXT: Init Undef Pass
; GCN-O1-NEXT: Process Implicit Definitions
; GCN-O1-NEXT: Remove unreachable machine basic blocks
; GCN-O1-NEXT: Live Variable Analysis
@@ -618,6 +619,7 @@
; GCN-O1-OPTS-NEXT: Register Usage Information Propagation
; GCN-O1-OPTS-NEXT: Detect Dead Lanes
; GCN-O1-OPTS-NEXT: Remove dead machine instructions
+; GCN-O1-OPTS-NEXT: Init Undef Pass
; GCN-O1-OPTS-NEXT: Process Implicit Definitions
; GCN-O1-OPTS-NEXT: Remove unreachable machine basic blocks
; GCN-O1-OPTS-NEXT: Live Variable Analysis
@@ -920,6 +922,7 @@
; GCN-O2-NEXT: Register Usage Information Propagation
; GCN-O2-NEXT: Detect Dead Lanes
; GCN-O2-NEXT: Remove dead machine instructions
+; GCN-O2-NEXT: Init Undef Pass
; GCN-O2-NEXT: Process Implicit Definitions
; GCN-O2-NEXT: Remove unreachable machine basic blocks
; GCN-O2-NEXT: Live Variable Analysis
@@ -1235,6 +1238,7 @@
; GCN-O3-NEXT: Register Usage Information Propagation
; GCN-O3-NEXT: Detect Dead Lanes
; GCN-O3-NEXT: Remove dead machine instructions
+; GCN-O3-NEXT: Init Undef Pass
; GCN-O3-NEXT: Process Implicit Definitions
; GCN-O3-NEXT: Remove unreachable machine basic blocks
; GCN-O3-NEXT: Live Variable Analysis
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 5e565970fc3a86..5914e98549fcc4 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -113,6 +113,7 @@
; CHECK-NEXT: ARM pre- register allocation load / store optimization pass
; CHECK-NEXT: ARM A15 S->D optimizer
; CHECK-NEXT: Detect Dead Lanes
+; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 3134d940545e80..a31eb8d11a35a6 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -105,6 +105,7 @@
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: LoongArch Pre-RA pseudo instruction expansion pass
; CHECK-NEXT: Detect Dead Lanes
+; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
index 6ce4416211cc4d..f94f91b38fecc9 100644
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -149,6 +149,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Modulo Software Pipelining
; CHECK-NEXT: Detect Dead Lanes
+; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index e90fa24761bc16..faf37545e1a117 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -43,7 +43,7 @@
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
-; CHECK-NEXT: RISC-V init undef pass
+; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 364c1e430b9156..90472f246918f3 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -120,7 +120,7 @@
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: Detect Dead Lanes
-; CHECK-NEXT: RISC-V init undef pass
+; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
diff --git a/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir b/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir
index 4102aa8aa4d723..e090b313d4f7b8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -run-pass=riscv-init-undef -o - %s | FileCheck %s --check-prefix=MIR
+# RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -run-pass=init-undef -o - %s | FileCheck %s --check-prefix=MIR
...
---
name: vrgather_all_undef
diff --git a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir
index bf12a4df88d38c..9cafb323dc65c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc %s -mtriple=riscv64 -mattr=+v -riscv-enable-subreg-liveness -run-pass=riscv-init-undef -o - | FileCheck %s
+# RUN: llc %s -mtriple=riscv64 -mattr=+v -riscv-enable-subreg-liveness -run-pass=init-undef -o - | FileCheck %s
...
---
diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir
index 58b2687824aa14..dcf61c048ff0e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=riscv32 -mattr=+v -riscv-enable-subreg-liveness -run-pass riscv-init-undef -run-pass machineverifier %s -o - | FileCheck %s
+# RUN: llc -mtriple=riscv32 -mattr=+v -riscv-enable-subreg-liveness -run-pass init-undef -run-pass machineverifier %s -o - | FileCheck %s
--- |
source_filename = "<stdin>"
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
index 9bb24fc61ccef3..02234c63725360 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
@@ -699,6 +699,17 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32_undef() {
+; CHECK-LABEL: test_vhcaddq_rot270_s32_undef:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vhcadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270
+; CHECK-NOT: vhcadd.s32 q[[REG:[0-9]+]], q{{[0-9]+}}, q[[REG]], #270
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> undef, <4 x i32> undef)
+ ret <4 x i32> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vhcaddq_rot90_x_s8:
; CHECK: @ %bb.0: @ %entry
diff --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
index c95fe2296e0998..203ce1f8811895 100644
--- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
@@ -373,13 +373,13 @@ define arm_aapcs_vfpcc void @mul_i32(ptr %A, ptr %B, i64 %C, ptr %D) {
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: ldr.w lr, [sp, #20]
-; CHECK-NEXT: vmov.f32 s10, s1
; CHECK-NEXT: vmov.f32 s14, s5
+; CHECK-NEXT: vmov.f32 s10, s1
; CHECK-NEXT: vmov r5, s4
; CHECK-NEXT: vmov.f32 s4, s6
; CHECK-NEXT: vmov.f32 s6, s7
-; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov r1, s14
+; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: smull r12, r3, r1, r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.f32 s0, s2
diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
index bd672d1ba4f660..6d581afe9fb31e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
@@ -222,88 +222,88 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: vldrw.u32 q1, [r4]
; CHECK-NEXT: .LBB1_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q4, [r5], #16
-; CHECK-NEXT: vldrw.u32 q3, [r0], #16
+; CHECK-NEXT: vldrw.u32 q3, [r5], #16
+; CHECK-NEXT: vldrw.u32 q2, [r0], #16
; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov.w r2, #-1
-; CHECK-NEXT: vmov.f32 s8, s14
+; CHECK-NEXT: vmov.f32 s16, s10
; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: vmov.f32 s20, s18
+; CHECK-NEXT: vmov.f32 s20, s14
+; CHECK-NEXT: vmov.f32 s18, s11
+; CHECK-NEXT: vmov.f32 s22, s15
; CHECK-NEXT: mov.w r8, #0
-; CHECK-NEXT: vmov.f32 s10, s15
-; CHECK-NEXT: vmov.f32 s22, s19
-; CHECK-NEXT: vmullb.s32 q6, q5, q2
-; CHECK-NEXT: vmov.f32 s18, s17
+; CHECK-NEXT: vmullb.s32 q6, q5, q4
+; CHECK-NEXT: vmov.f32 s14, s13
; CHECK-NEXT: vmov r4, r7, d12
; CHECK-NEXT: asrl r4, r7, #31
-; CHECK-NEXT: vmov.f32 s14, s13
+; CHECK-NEXT: vmov.f32 s10, s9
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
; CHECK-NEXT: sbcs.w r5, r2, r7
; CHECK-NEXT: csetm r5, lt
; CHECK-NEXT: bfi r8, r5, #0, #8
; CHECK-NEXT: vmov r10, r5, d13
; CHECK-NEXT: asrl r10, r5, #31
-; CHECK-NEXT: vmov r6, s18
+; CHECK-NEXT: vmov r6, s14
; CHECK-NEXT: rsbs.w r3, r10, #-2147483648
-; CHECK-NEXT: vmov q2[2], q2[0], r4, r10
+; CHECK-NEXT: vmov q4[2], q4[0], r4, r10
; CHECK-NEXT: sbcs.w r3, r2, r5
-; CHECK-NEXT: vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT: vmov q4[3], q4[1], r7, r5
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r8, r3, #8, #8
; CHECK-NEXT: vmsr p0, r8
; CHECK-NEXT: mvn r8, #-2147483648
-; CHECK-NEXT: vpsel q2, q2, q0
-; CHECK-NEXT: vmov r3, r4, d4
+; CHECK-NEXT: vpsel q4, q4, q0
+; CHECK-NEXT: vmov r3, r4, d8
; CHECK-NEXT: subs.w r3, r3, r8
; CHECK-NEXT: sbcs r3, r4, #0
; CHECK-NEXT: mov.w r4, #0
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r4, r3, #0, #8
-; CHECK-NEXT: vmov r3, r5, d5
+; CHECK-NEXT: vmov r3, r5, d9
; CHECK-NEXT: subs.w r3, r3, r8
; CHECK-NEXT: sbcs r3, r5, #0
; CHECK-NEXT: mov.w r5, #0
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r4, r3, #8, #8
-; CHECK-NEXT: vmov r3, s12
+; CHECK-NEXT: vmov r3, s8
; CHECK-NEXT: vmsr p0, r4
-; CHECK-NEXT: vmov r4, s16
-; CHECK-NEXT: vpsel q2, q2, q1
+; CHECK-NEXT: vmov r4, s12
+; CHECK-NEXT: vpsel q4, q4, q1
; CHECK-NEXT: smull r4, r7, r4, r3
; CHECK-NEXT: asrl r4, r7, #31
; CHECK-NEXT: rsbs.w r3, r4, #-2147483648
; CHECK-NEXT: sbcs.w r3, r2, r7
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r5, r3, #0, #8
-; CHECK-NEXT: vmov r3, s14
+; CHECK-NEXT: vmov r3, s10
; CHECK-NEXT: smull r6, r3, r6, r3
; CHECK-NEXT: asrl r6, r3, #31
; CHECK-NEXT: rsbs.w r1, r6, #-2147483648
-; CHECK-NEXT: vmov q3[2], q3[0], r4, r6
+; CHECK-NEXT: vmov q2[2], q2[0], r4, r6
; CHECK-NEXT: sbcs.w r1, r2, r3
-; CHECK-NEXT: vmov q3[3], q3[1], r7, r3
+; CHECK-NEXT: vmov q2[3], q2[1], r7, r3
; CHECK-NEXT: csetm r1, lt
; CHECK-NEXT: bfi r5, r1, #8, #8
; CHECK-NEXT: vmsr p0, r5
; CHECK-NEXT: ldrd r5, r2, [sp, #8] @ 8-byte Folded Reload
-; CHECK-NEXT: vpsel q3, q3, q0
-; CHECK-NEXT: vmov r1, r3, d6
+; CHECK-NEXT: vpsel q2, q2, q0
+; CHECK-NEXT: vmov r1, r3, d4
; CHECK-NEXT: subs.w r1, r1, r8
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: csetm r1, lt
; CHECK-NEXT: bfi r3, r1, #0, #8
-; CHECK-NEXT: vmov r1, r4, d7
+; CHECK-NEXT: vmov r1, r4, d5
; CHECK-NEXT: subs.w r1, r1, r8
; CHECK-NEXT: sbcs r1, r4, #0
; CHECK-NEXT: csetm r1, lt
; CHECK-NEXT: bfi r3, r1, #8, #8
; CHECK-NEXT: vmsr p0, r3
-; CHECK-NEXT: vpsel q3, q3, q1
-; CHECK-NEXT: vmov.f32 s13, s14
-; CHECK-NEXT: vmov.f32 s14, s8
-; CHECK-NEXT: vmov.f32 s15, s10
-; CHECK-NEXT: vstrb.8 q3, [r2], #16
+; CHECK-NEXT: vpsel q2, q2, q1
+; CHECK-NEXT: vmov.f32 s9, s10
+; CHECK-NEXT: vmov.f32 s10, s16
+; CHECK-NEXT: vmov.f32 s11, s18
+; CHECK-NEXT: vstrb.8 q2, [r2], #16
; CHECK-NEXT: le lr, .LBB1_4
; CHECK-NEXT: @ %bb.5: @ %middle.block
; CHECK-NEXT: ldrd r1, r3, [sp] @ 8-byte Folded Reload
@@ -462,14 +462,14 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
; CHECK-NEXT: vcmp.u32 cs, q1, q4
; CHECK-NEXT: vstr p0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: vpstt
-; CHECK-NEXT: vldrwt.u32 q5, [r0], #16
-; CHECK-NEXT: vldrwt.u32 q6, [r1], #16
-; CHECK-NEXT: vmov.f32 s16, s22
-; CHECK-NEXT: vmov.f32 s18, s23
-; CHECK-NEXT: vmov.f32 s28, s26
-; CHECK-NEXT: vmov.f32 s30, s27
-; CHECK-NEXT: vmullb.s32 q0, q7, q4
-; CHECK-NEXT: vmov.f32 s22, s25
+; CHECK-NEXT: vldrwt.u32 q4, [r0], #16
+; CHECK-NEXT: vldrwt.u32 q5, [r1], #16
+; CHECK-NEXT: vmov.f32 s24, s18
+; CHECK-NEXT: vmov.f32 s26, s19
+; CHECK-NEXT: vmov.f32 s28, s22
+; CHECK-NEXT: vmov.f32 s30, s23
+; CHECK-NEXT: vmullb.s32 q0, q7, q6
+; CHECK-NEXT: vmov.f32 s18, s21
; CHECK-NEXT: vmov r10, r5, d0
; CHECK-NEXT: asrl r10, r5, #31
; CHECK-NEXT: rsbs.w r7, r10, #-2147483648
@@ -483,7 +483,7 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
; CHECK-NEXT: sbcs.w r3, r12, r7
; CHECK-NEXT: vmov q0[3], q0[1], r5, r7
; CHECK-NEXT: csetm r3, lt
-; CHECK-NEXT: vmov r7, s22
+; CHECK-NEXT: vmov r7, s18
; CHECK-NEXT: bfi r4, r3, #8, #8
; CHECK-NEXT: vmsr p0, r4
; CHECK-NEXT: vpsel q0, q0, q2
@@ -498,11 +498,11 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
; CHECK-NEXT: sbcs r3, r5, #0
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r4, r3, #8, #8
-; CHECK-NEXT: vmov r3, s20
+; CHECK-NEXT: vmov r3, s16
; CHECK-NEXT: vmsr p0, r4
-; CHECK-NEXT: vmov r4, s24
-; CHECK-NEXT: vpsel q4, q0, q3
-; CHECK-NEXT: vmov.f32 s2, s21
+; CHECK-NEXT: vmov r4, s20
+; CHECK-NEXT: vpsel q6, q0, q3
+; CHECK-NEXT: vmov.f32 s2, s17
; CHECK-NEXT: smull r10, r5, r4, r3
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: asrl r10, r5, #31
@@ -536,8 +536,8 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
; CHECK-NEXT: vpsel q0, q0, q3
; CHECK-NEXT: vldr p0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: vmov.f32 s1, s2
-; CHECK-NEXT: vmov.f32 s2, s16
-; CHECK-NEXT: vmov.f32 s3, s18
+; CHECK-NEXT: vmov.f32 s2, s24
+; CHECK-NEXT: vmov.f32 s3, s26
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q0, [r2], #16
; CHECK-NEXT: le lr, .LBB2_2
@@ -778,18 +778,17 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: .LBB4_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
-; CHECK-NEXT: vldrw.u32 q3, [r1], #16
-; CHECK-NEXT: vmov.f32 s8, s6
-; CHECK-NEXT: vmov.f32 s16, s14
-; CHECK-NEXT: vmov.f32 s10, s7
-; CHECK-NEXT: vmov.f32 s18, s15
-; CHECK-NEXT: vmullb.u32 q5, q4, q2
+; CHECK-NEXT: vldrw.u32 q2, [r1], #16
+; CHECK-NEXT: vmov.f32 s12, s6
+; CHECK-NEXT: vmov.f32 s16, s10
+; CHECK-NEXT: vmov.f32 s14, s7
+; CHECK-NEXT: vmov.f32 s18, s11
+; CHECK-NEXT: vmullb.u32 q5, q4, q3
; CHECK-NEXT: vmov.f32 s6, s5
; CHECK-NEXT: vmov r10, r5, d10
; CHECK-NEXT: lsrl r10, r5, #31
-; CHECK-NEXT: vmov.f32 s14, s13
+; CHECK-NEXT: vmov.f32 s10, s9
; CHECK-NEXT: subs.w r6, r10, #-1
-; CHECK-NEXT: vmullb.u32 q4, q3, q1
; CHECK-NEXT: sbcs r5, r5, #0
; CHECK-NEXT: mov.w r6, #0
; CHECK-NEXT: csetm r5, lo
@@ -797,15 +796,16 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: vmov r4, r5, d11
; CHECK-NEXT: lsrl r4, r5, #31
; CHECK-NEXT: subs.w r7, r4, #-1
-; CHECK-NEXT: vmov q2[2], q2[0], r10, r4
+; CHECK-NEXT: vmov q3[2], q3[0], r10, r4
; CHECK-NEXT: sbcs r5, r5, #0
; CHECK-NEXT: csetm r5, lo
; CHECK-NEXT: bfi r6, r5, #8, #8
+; CHECK-NEXT: vmsr p0, r6
+; CHECK-NEXT: vpsel q3, q3, q0
+; CHECK-NEXT: vmullb.u32 q4, q2, q1
; CHECK-NEXT: vmov r10, r5, d8
; CHECK-NEXT: lsrl r10, r5, #31
-; CHECK-NEXT: vmsr p0, r6
; CHECK-NEXT: subs.w r6, r10, #-1
-; CHECK-NEXT: vpsel q2, q2, q0
; CHECK-NEXT: sbcs r5, r5, #0
; CHECK-NEXT: mov.w r6, #0
; CHECK-NEXT: csetm r5, lo
@@ -820,8 +820,8 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: vmsr p0, r6
; CHECK-NEXT: vpsel q1, q1, q0
; CHECK-NEXT: vmov.f32 s5, s6
-; CHECK-NEXT: vmov.f32 s6, s8
-; CHECK-NEXT: vmov.f32 s7, s10
+; CHECK-NEXT: vmov.f32 s6, s12
+; CHECK-NEXT: vmov.f32 s7, s14
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: le lr, .LBB4_4
; CHECK-NEXT: @ %bb.5: @ %middle.block
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll
index 217caeebe63356..cebc0d9c0e172c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll
@@ -190,12 +190,17 @@ entry:
define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0213_0ext:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
-; CHECK-NEXT: vmullb.s32 q2, q0, q3
+; CHECK-NEXT: vmov.f32 s17, s4
; CHECK-NEXT: vmov.f32 s0, s1
+; CHECK-NEXT: vmullb.s32 q2, q4, q3
; CHECK-NEXT: vmov.f32 s2, s3
; CHECK-NEXT: vmullb.s32 q1, q0, q3
; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
%shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -210,12 +215,17 @@ entry:
define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0ext_0213:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
-; CHECK-NEXT: vmullb.s32 q2, q3, q0
+; CHECK-NEXT: vmov.f32 s17, s4
; CHECK-NEXT: vmov.f32 s0, s1
+; CHECK-NEXT: vmullb.s32 q2, q3, q4
; CHECK-NEXT: vmov.f32 s2, s3
; CHECK-NEXT: vmullb.s32 q1, q3, q0
; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
%shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -466,12 +476,17 @@ entry:
define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0213_0ext:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
-; CHECK-NEXT: vmullb.u32 q2, q0, q3
+; CHECK-NEXT: vmov.f32 s17, s4
; CHECK-NEXT: vmov.f32 s0, s1
+; CHECK-NEXT: vmullb.u32 q2, q4, q3
; CHECK-NEXT: vmov.f32 s2, s3
; CHECK-NEXT: vmullb.u32 q1, q0, q3
; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
%shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -486,12 +501,17 @@ entry:
define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0ext_0213:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
-; CHECK-NEXT: vmullb.u32 q2, q3, q0
+; CHECK-NEXT: vmov.f32 s17, s4
; CHECK-NEXT: vmov.f32 s0, s1
+; CHECK-NEXT: vmullb.u32 q2, q3, q4
; CHECK-NEXT: vmov.f32 s2, s3
; CHECK-NEXT: vmullb.u32 q1, q3, q0
; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
%shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 6f2bba84a6eccf..43589dc993dabb 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -131,6 +131,7 @@
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Tile Register Pre-configure
; CHECK-NEXT: Detect Dead Lanes
+; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
index 43eaa72047e659..12366a35d6f1a2 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
@@ -132,7 +132,6 @@ static_library("LLVMRISCVCodeGen") {
"RISCVOptWInstrs.cpp",
"RISCVPostRAExpandPseudoInsts.cpp",
"RISCVPushPopOptimizer.cpp",
- "RISCVRVVInitUndef.cpp",
"RISCVRedundantCopyElimination.cpp",
"RISCVRegisterInfo.cpp",
"RISCVSubtarget.cpp",
More information about the llvm-commits
mailing list