[llvm] [ARM] Add pass for handling undef early-clobber values (PR #77770)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 11 05:51:45 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Jack Styles (Stylie777)
<details>
<summary>Changes</summary>
When using Greedy Register Allocation, there are times where early-clobber values are ignored, and assigned the same register. This is illeagal behaviour for these intructions. To get around this, using Pseudo instructions for early-clobber registers gives them a definition and allows Greedy to assign them to a different register. This then meets the ARM Architecture Reference Manual and matches the defined behaviour.
This patch takes a similar approach to the RISC-V pass added as part of #<!-- -->3b8c0b3 to fix issue 50157.
---
Full diff: https://github.com/llvm/llvm-project/pull/77770.diff
8 Files Affected:
- (modified) llvm/lib/Target/ARM/ARM.h (+4)
- (modified) llvm/lib/Target/ARM/ARMAsmPrinter.cpp (+3)
- (added) llvm/lib/Target/ARM/ARMInitUndef.cpp (+242)
- (modified) llvm/lib/Target/ARM/ARMInstrInfo.td (+8)
- (modified) llvm/lib/Target/ARM/ARMTargetMachine.cpp (+10)
- (modified) llvm/lib/Target/ARM/CMakeLists.txt (+1)
- (modified) llvm/test/CodeGen/ARM/O3-pipeline.ll (+1)
- (modified) llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll (+11)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h
index b96e0182298524..8f38ee370ab5ee 100644
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@@ -57,6 +57,7 @@ FunctionPass *createARMSLSHardeningPass();
FunctionPass *createARMIndirectThunks();
Pass *createMVELaneInterleavingPass();
FunctionPass *createARMFixCortexA57AES1742098Pass();
+FunctionPass *createARMInitUndefPass();
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
@@ -67,6 +68,7 @@ void initializeARMConstantIslandsPass(PassRegistry &);
void initializeARMDAGToDAGISelPass(PassRegistry &);
void initializeARMExpandPseudoPass(PassRegistry &);
void initializeARMFixCortexA57AES1742098Pass(PassRegistry &);
+void initializeARMInitUndefPass(PassRegistry &);
void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMLowOverheadLoopsPass(PassRegistry &);
void initializeARMParallelDSPPass(PassRegistry &);
@@ -80,6 +82,8 @@ void initializeMVEVPTBlockPass(PassRegistry &);
void initializeThumb2ITBlockPass(PassRegistry &);
void initializeThumb2SizeReducePass(PassRegistry &);
+extern char &ARMInitUndefPass;
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARM_H
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 15cda9b9432d5f..8e39e51a28e3e3 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -2409,6 +2409,9 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
case ARM::SEH_EpilogEnd:
ATS.emitARMWinCFIEpilogEnd();
return;
+
+ case ARM::PseudoARMInitUndef:
+ return;
}
MCInst TmpInst;
diff --git a/llvm/lib/Target/ARM/ARMInitUndef.cpp b/llvm/lib/Target/ARM/ARMInitUndef.cpp
new file mode 100644
index 00000000000000..815abcc2f43660
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMInitUndef.cpp
@@ -0,0 +1,242 @@
+//===- ARMInitUndef.cpp - Initialize undef vector value to pseudo -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass runs to allow for Undef values to be given a definition in
+// early-clobber instructions. This can occur when using higher levels of
+// optimisations. Before, undef values were ignored and it would result in
+// early-clobber instructions using the same registeres for the output as one of
+// the inputs. This is an illegal operation according to the ARM Architecture
+// Reference Manual. This pass will check for early-clobber instructions and
+// give any undef values a defined Pseudo value to ensure that the early-clobber
+// rules are followed.
+//
+// Example: Without this pass, for the vhcadd instruction the following would
+// be generated: `vhcadd.s32 q0, q0, q0, #270`. This is an illegal instruction
+// as the output register and 2nd input register cannot match. By using this
+// pass, and using the Pseudo instruction, the following will be generated.
+// `vhcadd.s32 q0, q1, q2, #270`. This is allowed, as the output register and qd
+// input register are different.
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/ADT/bit.h"
+#include "llvm/CodeGen/DetectDeadLanes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include <cstddef>
+#include <optional>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-init-undef"
+#define ARM_INIT_UNDEF_NAME "ARM init undef pass"
+
+namespace {
+class ARMInitUndef : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ const ARMSubtarget *ST;
+ const TargetRegisterInfo *TRI;
+
+ SmallSet<Register, 8> NewRegs;
+
+public:
+ static char ID;
+
+ ARMInitUndef() : MachineFunctionPass(ID) {
+ initializeARMInitUndefPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return ARM_INIT_UNDEF_NAME; }
+
+private:
+ bool handleImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &Inst);
+ bool isVectorRegClass(const Register R);
+ const TargetRegisterClass *
+ getVRLargestSuperClass(const TargetRegisterClass *RC) const;
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
+ const DeadLaneDetector &DLD);
+};
+} // end anonymous namespace
+
+char ARMInitUndef::ID = 0;
+INITIALIZE_PASS(ARMInitUndef, DEBUG_TYPE, ARM_INIT_UNDEF_NAME, false, false)
+char &llvm::ARMInitUndefPass = ARMInitUndef::ID;
+
+static bool isEarlyClobberMI(MachineInstr &MI) {
+ return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) {
+ return DefMO.isReg() && DefMO.isEarlyClobber();
+ });
+}
+
+static unsigned getUndefInitOpcode(unsigned RegClassID) {
+ if (RegClassID == ARM::MQPRRegClass.getID())
+ return ARM::PseudoARMInitUndef;
+
+ llvm_unreachable("Unexpected register class.");
+}
+
+/* handleImplicitDef is used to apply the definition to any undef values. This
+ * is only done for instructions that are early-clobber, and are not tied to
+ * other instructions. It will cycle through every MachineBasicBlock iterator
+ * that is an IMPLICIT_DEF then check for if it is early-clobber, not tied and
+ * is used. If all these are true, then the value is applied. This is then
+ * changed to be defined at ARM_INIT_UNDEF_PSEUDO
+ */
+bool ARMInitUndef::handleImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &Inst) {
+ bool Changed = false;
+ Register LastReg;
+
+ while (Inst->getOpcode() == TargetOpcode::IMPLICIT_DEF &&
+ Inst->getOperand(0).getReg() != LastReg) {
+
+ bool HasOtherUse = false;
+ SmallVector<MachineOperand *, 1> UseMOs;
+ LastReg = Inst->getOperand(0).getReg();
+
+ Register Reg = Inst->getOperand(0).getReg();
+ if (!Reg.isVirtual())
+ continue;
+
+ for (MachineOperand &MO : MRI->reg_operands(Reg)) {
+ LLVM_DEBUG(dbgs() << "Register: " << MO.getReg() << "\n");
+ LLVM_DEBUG(dbgs() << "MO " << MO << " is EarlyClobber "
+ << isEarlyClobberMI(*MO.getParent()) << "\n");
+ if (isEarlyClobberMI(*MO.getParent())) {
+ LLVM_DEBUG(dbgs() << "MO " << &MO << " is use " << MO.isUse()
+ << " MO is tied " << MO.isTied() << "\n");
+ if (MO.isUse() && !MO.isTied()) {
+ LLVM_DEBUG(dbgs() << "MO " << MO << " added\n");
+ UseMOs.push_back(&MO);
+ } else
+ HasOtherUse = true;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "UseMOs " << &UseMOs << " is empty " << UseMOs.empty()
+ << "\n");
+ if (UseMOs.empty())
+ continue;
+
+ LLVM_DEBUG(
+ dbgs() << "Emitting PseudoARMInitUndef for implicit vector register "
+ << Reg << '\n'
+ << '\n');
+
+ const TargetRegisterClass *TargetRegClass =
+ getVRLargestSuperClass(MRI->getRegClass(Reg));
+ unsigned Opcode = getUndefInitOpcode(TargetRegClass->getID());
+
+ Register NewDest = Reg;
+ if (HasOtherUse) {
+ NewDest = MRI->createVirtualRegister(TargetRegClass);
+ NewRegs.insert(NewDest);
+ }
+ BuildMI(MBB, Inst, Inst->getDebugLoc(), TII->get(Opcode), NewDest);
+
+ if (!HasOtherUse)
+ Inst = MBB.erase(Inst);
+
+ for (auto *MO : UseMOs) {
+ MO->setReg(NewDest);
+ MO->setIsUndef(false);
+ }
+
+ Changed = true;
+ }
+ return Changed;
+}
+
+bool ARMInitUndef::isVectorRegClass(Register R) {
+ return ARM::MQPRRegClass.hasSubClassEq(MRI->getRegClass(R));
+}
+
+const TargetRegisterClass *
+ARMInitUndef::getVRLargestSuperClass(const TargetRegisterClass *RC) const {
+ if (ARM::MQPRRegClass.hasSubClassEq(RC))
+ return &ARM::MQPRRegClass;
+ return RC;
+}
+
+/* This will process a BasicBlock within the MachineFunction. This will take
+ * each iterator and determine if there is an Implicit Def that needs dealing
+ * with. This also deals with implicit def's that are tied to an operand, to
+ * ensure that the correct undef values are given a definition.
+ */
+bool ARMInitUndef::processBasicBlock(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ const DeadLaneDetector &DLD) {
+ bool Changed = false;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
+ MachineInstr &MI = *I;
+
+ unsigned UseOpIdx;
+ if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
+ MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+ if (UseMO.getReg() == ARM::NoRegister) {
+ const TargetRegisterClass *RC =
+ TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF);
+ Register NewDest = MRI->createVirtualRegister(RC);
+
+ NewRegs.insert(NewDest);
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
+ NewDest);
+ UseMO.setReg(NewDest);
+ Changed = true;
+ }
+ }
+
+ if (MI.isImplicitDef()) {
+ auto DstReg = MI.getOperand(0).getReg();
+ if (DstReg.isVirtual() && isVectorRegClass(DstReg))
+ Changed |= handleImplicitDef(MBB, I);
+ }
+ }
+ return Changed;
+}
+
+bool ARMInitUndef::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget<ARMSubtarget>();
+ MRI = &MF.getRegInfo();
+ TII = ST->getInstrInfo();
+ TRI = MRI->getTargetRegisterInfo();
+
+ bool Changed = false;
+ DeadLaneDetector DLD(MRI, TRI);
+ DLD.computeSubRegisterLaneBitInfo();
+
+ for (MachineBasicBlock &BB : MF) {
+ Changed |= processBasicBlock(MF, BB, DLD);
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createARMInitUndefPass() { return new ARMInitUndef(); }
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 812b5730875d5d..2259c4fd391fcc 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -6532,3 +6532,11 @@ let isPseudo = 1 in {
let isTerminator = 1 in
def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
}
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions for use when early-clobber is defined and Greedy Register
+// Allocation is used. This ensures the constraint is used properly.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1 in {
+ def PseudoARMInitUndef : PseudoInst<(outs MQPR:$vd), (ins), NoItinerary, []>;
+}
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index a99773691df123..2498a0b6a6a18f 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/Function.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -111,6 +112,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeMVELaneInterleavingPass(Registry);
initializeARMFixCortexA57AES1742098Pass(Registry);
initializeARMDAGToDAGISelPass(Registry);
+ initializeARMInitUndefPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -384,6 +386,7 @@ class ARMPassConfig : public TargetPassConfig {
void addPreSched2() override;
void addPreEmitPass() override;
void addPreEmitPass2() override;
+ void addOptimizedRegAlloc() override;
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
@@ -621,6 +624,13 @@ void ARMPassConfig::addPreEmitPass2() {
}
}
+void ARMPassConfig::addOptimizedRegAlloc() {
+ if (getOptimizeRegAlloc())
+ insertPass(&DetectDeadLanesID, &ARMInitUndefPass);
+
+ TargetPassConfig::addOptimizedRegAlloc();
+}
+
yaml::MachineFunctionInfo *
ARMBaseTargetMachine::createDefaultFuncInfoYAML() const {
return new yaml::ARMFunctionInfo();
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index 3d6af28b437538..d52a42adb325b3 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -35,6 +35,7 @@ add_llvm_target(ARMCodeGen
ARMFixCortexA57AES1742098Pass.cpp
ARMFrameLowering.cpp
ARMHazardRecognizer.cpp
+ ARMInitUndef.cpp
ARMInstructionSelector.cpp
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 5e565970fc3a86..310a82b26893e0 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -113,6 +113,7 @@
; CHECK-NEXT: ARM pre- register allocation load / store optimization pass
; CHECK-NEXT: ARM A15 S->D optimizer
; CHECK-NEXT: Detect Dead Lanes
+; CHECK-NEXT: ARM init undef pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
index 9bb24fc61ccef3..02234c63725360 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
@@ -699,6 +699,17 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32_undef() {
+; CHECK-LABEL: test_vhcaddq_rot270_s32_undef:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vhcadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270
+; CHECK-NOT: vhcadd.s32 q[[REG:[0-9]+]], q{{[0-9]+}}, q[[REG]], #270
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> undef, <4 x i32> undef)
+ ret <4 x i32> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vhcaddq_rot90_x_s8:
; CHECK: @ %bb.0: @ %entry
``````````
</details>
https://github.com/llvm/llvm-project/pull/77770
More information about the llvm-commits
mailing list