[llvm] [ARM] Add pass for handling undef early-clobber values (PR #77770)

Jack Styles via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 11 05:50:53 PST 2024


https://github.com/Stylie777 created https://github.com/llvm/llvm-project/pull/77770

When using Greedy Register Allocation, there are times where early-clobber values are ignored, and assigned the same register. This is illeagal behaviour for these intructions. To get around this, using Pseudo instructions for early-clobber registers gives them a definition and allows Greedy to assign them to a different register. This then meets the ARM Architecture Reference Manual and matches the defined behaviour.

This patch takes a similar approach to the RISC-V pass added as part of #3b8c0b3 to fix issue 50157.

>From 401f22810225a288e417d62ac6f09ac4ffdc5d76 Mon Sep 17 00:00:00 2001
From: Jack Styles <jack.styles at arm.com>
Date: Thu, 11 Jan 2024 13:42:47 +0000
Subject: [PATCH] [ARM] Add pass for handling undef early-clobber values

When using Greedy Register Allocation, there are times where
early-clobber values are ignored, and assigned the same register.
This is illeagal behaviour for these intructions. To get around this,
using Pseudo instructions for early-clobber registers gives them a
definition and allows Greedy to assign them to a different register.
This then meets the ARM Architecture Reference Manual and
matches the defined behaviour.

This patch takes a similar approach to the RISC-V pass added as part
of #3b8c0b3 to fix issue 50157.
---
 llvm/lib/Target/ARM/ARM.h                     |   4 +
 llvm/lib/Target/ARM/ARMAsmPrinter.cpp         |   3 +
 llvm/lib/Target/ARM/ARMInitUndef.cpp          | 242 ++++++++++++++++++
 llvm/lib/Target/ARM/ARMInstrInfo.td           |   8 +
 llvm/lib/Target/ARM/ARMTargetMachine.cpp      |  10 +
 llvm/lib/Target/ARM/CMakeLists.txt            |   1 +
 llvm/test/CodeGen/ARM/O3-pipeline.ll          |   1 +
 .../CodeGen/Thumb2/mve-intrinsics/vcaddq.ll   |  11 +
 8 files changed, 280 insertions(+)
 create mode 100644 llvm/lib/Target/ARM/ARMInitUndef.cpp

diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h
index b96e0182298524..8f38ee370ab5ee 100644
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@@ -57,6 +57,7 @@ FunctionPass *createARMSLSHardeningPass();
 FunctionPass *createARMIndirectThunks();
 Pass *createMVELaneInterleavingPass();
 FunctionPass *createARMFixCortexA57AES1742098Pass();
+FunctionPass *createARMInitUndefPass();
 
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
@@ -67,6 +68,7 @@ void initializeARMConstantIslandsPass(PassRegistry &);
 void initializeARMDAGToDAGISelPass(PassRegistry &);
 void initializeARMExpandPseudoPass(PassRegistry &);
 void initializeARMFixCortexA57AES1742098Pass(PassRegistry &);
+void initializeARMInitUndefPass(PassRegistry &);
 void initializeARMLoadStoreOptPass(PassRegistry &);
 void initializeARMLowOverheadLoopsPass(PassRegistry &);
 void initializeARMParallelDSPPass(PassRegistry &);
@@ -80,6 +82,8 @@ void initializeMVEVPTBlockPass(PassRegistry &);
 void initializeThumb2ITBlockPass(PassRegistry &);
 void initializeThumb2SizeReducePass(PassRegistry &);
 
+extern char &ARMInitUndefPass;
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_ARM_ARM_H
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 15cda9b9432d5f..8e39e51a28e3e3 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -2409,6 +2409,9 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case ARM::SEH_EpilogEnd:
     ATS.emitARMWinCFIEpilogEnd();
     return;
+
+  case ARM::PseudoARMInitUndef:
+    return;
   }
 
   MCInst TmpInst;
diff --git a/llvm/lib/Target/ARM/ARMInitUndef.cpp b/llvm/lib/Target/ARM/ARMInitUndef.cpp
new file mode 100644
index 00000000000000..815abcc2f43660
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMInitUndef.cpp
@@ -0,0 +1,242 @@
+//===- ARMInitUndef.cpp - Initialize undef vector value to pseudo -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass runs to allow for Undef values to be given a definition in
+// early-clobber instructions. This can occur when using higher levels of
+// optimisations. Before, undef values were ignored and it would result in
+// early-clobber instructions using the same registeres for the output as one of
+// the inputs. This is an illegal operation according to the ARM Architecture
+// Reference Manual. This pass will check for early-clobber instructions and
+// give any undef values a defined Pseudo value to ensure that the early-clobber
+// rules are followed.
+//
+// Example: Without this pass, for the vhcadd instruction the following would
+// be generated: `vhcadd.s32 q0, q0, q0, #270`. This is an illegal instruction
+// as the output register and 2nd input register cannot match. By using this
+// pass, and using the Pseudo instruction, the following will be generated.
+// `vhcadd.s32 q0, q1, q2, #270`. This is allowed, as the output register and qd
+// input register are different.
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/ADT/bit.h"
+#include "llvm/CodeGen/DetectDeadLanes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include <cstddef>
+#include <optional>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-init-undef"
+#define ARM_INIT_UNDEF_NAME "ARM init undef pass"
+
+namespace {
+class ARMInitUndef : public MachineFunctionPass {
+  const TargetInstrInfo *TII;
+  MachineRegisterInfo *MRI;
+  const ARMSubtarget *ST;
+  const TargetRegisterInfo *TRI;
+
+  SmallSet<Register, 8> NewRegs;
+
+public:
+  static char ID;
+
+  ARMInitUndef() : MachineFunctionPass(ID) {
+    initializeARMInitUndefPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  StringRef getPassName() const override { return ARM_INIT_UNDEF_NAME; }
+
+private:
+  bool handleImplicitDef(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &Inst);
+  bool isVectorRegClass(const Register R);
+  const TargetRegisterClass *
+  getVRLargestSuperClass(const TargetRegisterClass *RC) const;
+  bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
+                         const DeadLaneDetector &DLD);
+};
+} // end anonymous namespace
+
+char ARMInitUndef::ID = 0;
+INITIALIZE_PASS(ARMInitUndef, DEBUG_TYPE, ARM_INIT_UNDEF_NAME, false, false)
+char &llvm::ARMInitUndefPass = ARMInitUndef::ID;
+
+static bool isEarlyClobberMI(MachineInstr &MI) {
+  return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) {
+    return DefMO.isReg() && DefMO.isEarlyClobber();
+  });
+}
+
+static unsigned getUndefInitOpcode(unsigned RegClassID) {
+  if (RegClassID == ARM::MQPRRegClass.getID())
+    return ARM::PseudoARMInitUndef;
+
+  llvm_unreachable("Unexpected register class.");
+}
+
+/* handleImplicitDef is used to apply the definition to any undef values. This
+ * is only done for instructions that are early-clobber, and are not tied to
+ * other instructions. It will cycle through every MachineBasicBlock iterator
+ * that is an IMPLICIT_DEF then check for if it is early-clobber, not tied and
+ * is used. If all these are true, then the value is applied. This is then
+ * changed to be defined at ARM_INIT_UNDEF_PSEUDO
+ */
+bool ARMInitUndef::handleImplicitDef(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator &Inst) {
+  bool Changed = false;
+  Register LastReg;
+
+  while (Inst->getOpcode() == TargetOpcode::IMPLICIT_DEF &&
+         Inst->getOperand(0).getReg() != LastReg) {
+
+    bool HasOtherUse = false;
+    SmallVector<MachineOperand *, 1> UseMOs;
+    LastReg = Inst->getOperand(0).getReg();
+
+    Register Reg = Inst->getOperand(0).getReg();
+    if (!Reg.isVirtual())
+      continue;
+
+    for (MachineOperand &MO : MRI->reg_operands(Reg)) {
+      LLVM_DEBUG(dbgs() << "Register: " << MO.getReg() << "\n");
+      LLVM_DEBUG(dbgs() << "MO " << MO << " is EarlyClobber "
+                        << isEarlyClobberMI(*MO.getParent()) << "\n");
+      if (isEarlyClobberMI(*MO.getParent())) {
+        LLVM_DEBUG(dbgs() << "MO " << &MO << " is use " << MO.isUse()
+                          << " MO is tied " << MO.isTied() << "\n");
+        if (MO.isUse() && !MO.isTied()) {
+          LLVM_DEBUG(dbgs() << "MO " << MO << " added\n");
+          UseMOs.push_back(&MO);
+        } else
+          HasOtherUse = true;
+      }
+    }
+    LLVM_DEBUG(dbgs() << "UseMOs " << &UseMOs << " is empty " << UseMOs.empty()
+                      << "\n");
+    if (UseMOs.empty())
+      continue;
+
+    LLVM_DEBUG(
+        dbgs() << "Emitting PseudoARMInitUndef for implicit vector register "
+               << Reg << '\n'
+               << '\n');
+
+    const TargetRegisterClass *TargetRegClass =
+        getVRLargestSuperClass(MRI->getRegClass(Reg));
+    unsigned Opcode = getUndefInitOpcode(TargetRegClass->getID());
+
+    Register NewDest = Reg;
+    if (HasOtherUse) {
+      NewDest = MRI->createVirtualRegister(TargetRegClass);
+      NewRegs.insert(NewDest);
+    }
+    BuildMI(MBB, Inst, Inst->getDebugLoc(), TII->get(Opcode), NewDest);
+
+    if (!HasOtherUse)
+      Inst = MBB.erase(Inst);
+
+    for (auto *MO : UseMOs) {
+      MO->setReg(NewDest);
+      MO->setIsUndef(false);
+    }
+
+    Changed = true;
+  }
+  return Changed;
+}
+
+bool ARMInitUndef::isVectorRegClass(Register R) {
+  return ARM::MQPRRegClass.hasSubClassEq(MRI->getRegClass(R));
+}
+
+const TargetRegisterClass *
+ARMInitUndef::getVRLargestSuperClass(const TargetRegisterClass *RC) const {
+  if (ARM::MQPRRegClass.hasSubClassEq(RC))
+    return &ARM::MQPRRegClass;
+  return RC;
+}
+
+/* This will process a BasicBlock within the MachineFunction. This will take
+ * each iterator and determine if there is an Implicit Def that needs dealing
+ * with. This also deals with implicit def's that are tied to an operand, to
+ * ensure that the correct undef values are given a definition.
+ */
+bool ARMInitUndef::processBasicBlock(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     const DeadLaneDetector &DLD) {
+  bool Changed = false;
+
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
+    MachineInstr &MI = *I;
+
+    unsigned UseOpIdx;
+    if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
+      MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+      if (UseMO.getReg() == ARM::NoRegister) {
+        const TargetRegisterClass *RC =
+            TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF);
+        Register NewDest = MRI->createVirtualRegister(RC);
+
+        NewRegs.insert(NewDest);
+        BuildMI(MBB, I, I->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
+                NewDest);
+        UseMO.setReg(NewDest);
+        Changed = true;
+      }
+    }
+
+    if (MI.isImplicitDef()) {
+      auto DstReg = MI.getOperand(0).getReg();
+      if (DstReg.isVirtual() && isVectorRegClass(DstReg))
+        Changed |= handleImplicitDef(MBB, I);
+    }
+  }
+  return Changed;
+}
+
+bool ARMInitUndef::runOnMachineFunction(MachineFunction &MF) {
+  ST = &MF.getSubtarget<ARMSubtarget>();
+  MRI = &MF.getRegInfo();
+  TII = ST->getInstrInfo();
+  TRI = MRI->getTargetRegisterInfo();
+
+  bool Changed = false;
+  DeadLaneDetector DLD(MRI, TRI);
+  DLD.computeSubRegisterLaneBitInfo();
+
+  for (MachineBasicBlock &BB : MF) {
+    Changed |= processBasicBlock(MF, BB, DLD);
+  }
+
+  return Changed;
+}
+
+FunctionPass *llvm::createARMInitUndefPass() { return new ARMInitUndef(); }
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 812b5730875d5d..2259c4fd391fcc 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -6532,3 +6532,11 @@ let isPseudo = 1 in {
   let isTerminator = 1 in
   def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
 }
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions for use when early-clobber is defined and Greedy Register
+// Allocation is used. This ensures the constraint is used properly.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1 in {
+  def PseudoARMInitUndef :  PseudoInst<(outs MQPR:$vd), (ins), NoItinerary, []>;
+}
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index a99773691df123..2498a0b6a6a18f 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -111,6 +112,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
   initializeMVELaneInterleavingPass(Registry);
   initializeARMFixCortexA57AES1742098Pass(Registry);
   initializeARMDAGToDAGISelPass(Registry);
+  initializeARMInitUndefPass(Registry);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -384,6 +386,7 @@ class ARMPassConfig : public TargetPassConfig {
   void addPreSched2() override;
   void addPreEmitPass() override;
   void addPreEmitPass2() override;
+  void addOptimizedRegAlloc() override;
 
   std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
 };
@@ -621,6 +624,13 @@ void ARMPassConfig::addPreEmitPass2() {
   }
 }
 
+void ARMPassConfig::addOptimizedRegAlloc() {
+  if (getOptimizeRegAlloc())
+    insertPass(&DetectDeadLanesID, &ARMInitUndefPass);
+
+  TargetPassConfig::addOptimizedRegAlloc();
+}
+
 yaml::MachineFunctionInfo *
 ARMBaseTargetMachine::createDefaultFuncInfoYAML() const {
   return new yaml::ARMFunctionInfo();
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index 3d6af28b437538..d52a42adb325b3 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -35,6 +35,7 @@ add_llvm_target(ARMCodeGen
   ARMFixCortexA57AES1742098Pass.cpp
   ARMFrameLowering.cpp
   ARMHazardRecognizer.cpp
+  ARMInitUndef.cpp
   ARMInstructionSelector.cpp
   ARMISelDAGToDAG.cpp
   ARMISelLowering.cpp
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 5e565970fc3a86..310a82b26893e0 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -113,6 +113,7 @@
 ; CHECK-NEXT:      ARM pre- register allocation load / store optimization pass
 ; CHECK-NEXT:      ARM A15 S->D optimizer
 ; CHECK-NEXT:      Detect Dead Lanes
+; CHECK-NEXT:      ARM init undef pass
 ; CHECK-NEXT:      Process Implicit Definitions
 ; CHECK-NEXT:      Remove unreachable machine basic blocks
 ; CHECK-NEXT:      Live Variable Analysis
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
index 9bb24fc61ccef3..02234c63725360 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
@@ -699,6 +699,17 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32_undef() {
+; CHECK-LABEL: test_vhcaddq_rot270_s32_undef:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT: vhcadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270
+; CHECK-NOT:  vhcadd.s32 q[[REG:[0-9]+]], q{{[0-9]+}}, q[[REG]], #270
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> undef, <4 x i32> undef)
+  ret <4 x i32> %0
+}
+
 define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
 ; CHECK-LABEL: test_vhcaddq_rot90_x_s8:
 ; CHECK:       @ %bb.0: @ %entry



More information about the llvm-commits mailing list