[llvm] [RISCV] Select mask operands as virtual registers and eliminate vmv0 (PR #125026)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 3 00:18:06 PST 2025
================
@@ -0,0 +1,154 @@
+//===- RISCVVMV0Elimination.cpp - VMV0 Elimination -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// Mask operands in vector pseudos have to be in v0. We select them as a virtual
+// register in the singleton vmv0 register class instead of copying them to $v0
+// straight away, to make optimizing masks easier.
+//
+// However the register allocator struggles with singleton register classes and
+// will run into errors like "ran out of registers during register allocation in
+// function"
+//
+// This pass runs just before register allocation and replaces any uses* of vmv0
+// with copies to $v0.
+//
+// %x:vrnov0 = PseudoVADD_VV_M1_MASK %0:vrnov0, %1:vr, %2:vr, %3:vmv0, ...
+// ->
+// $v0 = COPY %3:vr
+// %x:vrnov0 = PseudoVADD_VV_M1_MASK %0:vrnov0, %1:vr, %2:vr, $0, ...
+//
+// * The only uses of vmv0 left behind are when used for inline asm with the vm
+// constraint.
+//
+//===---------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#ifndef NDEBUG
+#include "llvm/ADT/PostOrderIterator.h"
+#endif
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-vmv0-elimination"
+
+namespace {
+
+class RISCVVMV0Elimination : public MachineFunctionPass {
+public:
+ static char ID;
+ RISCVVMV0Elimination() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ // TODO: We could move this closer to regalloc, out of SSA, which would
+ // allow scheduling past mask operands. We would need to preserve live
+ // intervals.
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+};
+
+} // namespace
+
+char RISCVVMV0Elimination::ID = 0;
+
+INITIALIZE_PASS(RISCVVMV0Elimination, DEBUG_TYPE, "RISC-V VMV0 Elimination",
+ false, false)
+
+FunctionPass *llvm::createRISCVVMV0EliminationPass() {
+ return new RISCVVMV0Elimination();
+}
+
+bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ // Skip if the vector extension is not enabled.
+ const RISCVSubtarget *ST = &MF.getSubtarget<RISCVSubtarget>();
+ if (!ST->hasVInstructions())
+ return false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const TargetInstrInfo *TII = ST->getInstrInfo();
+
+ auto IsVMV0 = [](const MCOperandInfo &MCOI) {
+ return MCOI.RegClass == RISCV::VMV0RegClassID;
+ };
+
+#ifndef NDEBUG
+ // Assert that we won't clobber any existing reads of V0 where we need to
+ // insert copies.
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ SmallPtrSet<MachineBasicBlock *, 8> V0ClobberedOnEntry;
+ for (MachineBasicBlock *MBB : RPOT) {
+ bool V0Clobbered = V0ClobberedOnEntry.contains(MBB);
+ for (MachineInstr &MI : *MBB) {
+ assert(!(MI.readsRegister(RISCV::V0, TRI) && V0Clobbered));
+ if (MI.modifiesRegister(RISCV::V0, TRI))
----------------
lukel97 wrote:
I added a test case for this in vmv0-elimination.ll, but couldn't get the assertion to trigger.
It looks like any inline asm that reads v0 (i.e. has a v0 input constraint: the vm constraint isn't affected here) will end up emitting a glued COPY to $v0 before it:
```
bb.0 (%ir-block.0):
liveins: $v8, $v9, $v0, $x10
%3:gpr = COPY $x10
%2:vr = COPY $v0
%1:vr = COPY $v9
%0:vr = COPY $v8
%4:vr = COPY %0:vr
%5:vr = COPY %1:vr
INLINEASM &"vadd.vv $0, $1, $2" [attdialect], $0:[regdef], implicit-def $v0, $1:[reguse:VR], %4:vr, $2:[reguse:VR], %5:vr
%8:vr = COPY $v0
%10:vmv0 = COPY %2:vr
%9:vrnov0 = PseudoVADD_VV_M1_MASK $noreg(tied-def 0), %0:vr, %1:vr, %10:vmv0, -1, 6, 0
PseudoVSE64_V_M1 killed %9:vrnov0, %3:gpr, -1, 6 :: (store (<vscale x 1 x s64>) into %ir.p)
%7:vr = COPY %0:vr
$v0 = COPY %8:vr
INLINEASM &"vadd.vv $0, $1, $2" [attdialect], $0:[regdef:VR], def %6:vr, $1:[reguse:VR], %7:vr, $2:[reguse], $v0
$v8 = COPY %6:vr
PseudoRET implicit $v8
```
So any inline asm which reads V0 shouldn't be clobbered. Should we maintain this stricter invariant then?
https://github.com/llvm/llvm-project/pull/125026
More information about the llvm-commits
mailing list