[llvm-branch-commits] [llvm] release/22.x: [RISCV] Add missing COPY elimination when folding vmerge into mask (#176077) (PR #176432)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jan 16 09:18:54 PST 2026
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/176432
Backport 6f69d68a9ef4a4e3fae634a80f24ea5d77e1fd45
Requested by: @mshockwave
>From 2a4ce5961b03345bdf8d62f36a26a9c9764fec0b Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 16 Jan 2026 09:07:21 -0800
Subject: [PATCH] [RISCV] Add missing COPY elimination when folding vmerge into
mask (#176077)
Found in #176001 (but unfortunately unrelated to its miscompilation),
the following snippet
```
early-clobber %102:vrm8 = PseudoVZEXT_VF8_M8 undef $noreg, killed undef %326, 16, 6 /* e64 */, 3 /* ta, ma */
%123:vrm8nov0 = COPY %102
%124:vmv0 = IMPLICIT_DEF
%121:vrm8nov0 = PseudoVMERGE_VVM_M8 undef $noreg, killed undef %327, killed undef %123, undef %124, 16, 6 /* e64 */
%125:vrm8 = COPY killed %121
BEQ killed undef %325, $x0, %bb.8
PseudoBR %bb.7
```
is turned into
```
%123:vrm8nov0 = COPY %121:vrm8nov0
%124:vmv0 = IMPLICIT_DEF
early-clobber %121:vrm8nov0 = PseudoVZEXT_VF8_M8_MASK undef %327:vrm8nov0(tied-def 0), killed undef %326:vr, %124:vmv0, 16, 6, 1
%125:vrm8 = COPY killed %121:vrm8nov0
BEQ killed undef %325:gpr, $x0, %bb.8
PseudoBR %bb.7
```
by RISC-V Vector Peephole's vmerge folding. This is problematic because
`%121` is used before its definition. This was caused by the fact that
vector peephole try to sink the new instruction -- in this case
`PseudoVZEXT_VF8_M8_MASK` -- until it's dominated by the mask. But we
forgot to sink all the COPYs of its result like `%123` as well.
This patch fixes this by removing those COPYs after the folding, as all
of their users should be dead at that moment.
(cherry picked from commit 6f69d68a9ef4a4e3fae634a80f24ea5d77e1fd45)
---
llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 27 ++++++++++++----
.../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 3 +-
.../CodeGen/RISCV/rvv/vmerge-peephole.mir | 32 ++++++++++++++++++-
3 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index a5385be0c011c..b00244af1a875 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -73,7 +73,9 @@ class RISCVVectorPeephole : public MachineFunctionPass {
bool isAllOnesMask(const MachineInstr *MaskDef) const;
std::optional<unsigned> getConstant(const MachineOperand &VL) const;
bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const;
- Register lookThruCopies(Register Reg, bool OneUseOnly = false) const;
+ Register
+ lookThruCopies(Register Reg, bool OneUseOnly = false,
+ SmallVectorImpl<MachineInstr *> *Copies = nullptr) const;
};
} // namespace
@@ -389,8 +391,9 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
// If \p Reg is defined by one or more COPYs of virtual registers, traverses
// the chain and returns the root non-COPY source.
-Register RISCVVectorPeephole::lookThruCopies(Register Reg,
- bool OneUseOnly) const {
+Register RISCVVectorPeephole::lookThruCopies(
+ Register Reg, bool OneUseOnly,
+ SmallVectorImpl<MachineInstr *> *Copies) const {
while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) {
if (!Def->isFullCopy())
break;
@@ -399,6 +402,8 @@ Register RISCVVectorPeephole::lookThruCopies(Register Reg,
break;
if (OneUseOnly && !MRI->hasOneNonDBGUse(Reg))
break;
+ if (Copies)
+ Copies->push_back(Def);
Reg = Src;
}
return Reg;
@@ -735,10 +740,12 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMERGE_VVM)
return false;
+ // Collect chain of COPYs on True's result for later cleanup.
+ SmallVector<MachineInstr *, 4> TrueCopies;
Register PassthruReg = lookThruCopies(MI.getOperand(1).getReg());
Register FalseReg = lookThruCopies(MI.getOperand(2).getReg());
- Register TrueReg =
- lookThruCopies(MI.getOperand(3).getReg(), /*OneUseOnly=*/true);
+ Register TrueReg = lookThruCopies(MI.getOperand(3).getReg(),
+ /*OneUseOnly=*/true, &TrueCopies);
if (!TrueReg.isVirtual() || !MRI->hasOneUse(TrueReg))
return false;
MachineInstr &True = *MRI->getUniqueVRegDef(TrueReg);
@@ -821,8 +828,9 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
assert(RISCVII::hasVecPolicyOp(True.getDesc().TSFlags) &&
"Foldable unmasked pseudo should have a policy op already");
- // Make sure the mask dominates True, otherwise move down True so it does.
- // VL will always dominate since if it's a register they need to be the same.
+ // Make sure the mask dominates True and its copies, otherwise move down True
+ // so it does. VL will always dominate since if it's a register they need to
+ // be the same.
if (!ensureDominates(MaskOp, True))
return false;
@@ -861,6 +869,11 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
MRI->clearKillFlags(FalseReg);
MI.eraseFromParent();
+ // Cleanup all the COPYs on True's value. We have to manually do this because
+ // sometimes sinking True causes these COPY to be invalid (use before define).
+ for (MachineInstr *TrueCopy : TrueCopies)
+ TrueCopy->eraseFromParent();
+
return true;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 5be32cc35fe37..acd9519bb5a8e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -867,9 +867,8 @@ define void @test_dag_loop() {
; CHECK-NEXT: vmseq.vv v0, v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetivli zero, 1, e16, m8, tu, mu
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, mu
; CHECK-NEXT: vle16.v v8, (zero), v0.t
-; CHECK-NEXT: vsetivli zero, 0, e16, m8, ta, ma
; CHECK-NEXT: vse16.v v8, (zero)
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir
index bc78a7732c15a..98b193f24d7c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir
@@ -128,7 +128,6 @@ body: |
; CHECK-NEXT: %passthru:vrnov0 = COPY $v8
; CHECK-NEXT: %mask:vmv0 = COPY $v0
; CHECK-NEXT: %z:vrnov0 = PseudoVLE32_V_M1_MASK %passthru, $noreg, %mask, %avl, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 1)
- ; CHECK-NEXT: %y:vrnov0 = COPY %z
%avl:gprnox0 = COPY $x8
%passthru:vrnov0 = COPY $v8
%x:vr = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size)
@@ -181,3 +180,34 @@ body: |
%mask:vmv0 = COPY $v0
PseudoVSE8_V_M1 %copy, $noreg, %avl, 5 /* e8 */
%y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %copy, %mask, %avl, 5 /* e32 */
+...
+---
+name: true_copy_elimination
+body: |
+ ; CHECK-LABEL: name: true_copy_elimination
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: PseudoBR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber %5:vrm8nov0 = PseudoVZEXT_VF8_M8_MASK $noreg, $noreg, [[DEF]], 16, 6 /* e64 */, 1 /* ta, mu */
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vrm8 = COPY %5
+ ; CHECK-NEXT: PseudoRET
+ bb.0:
+ successors: %bb.1
+
+ PseudoBR %bb.1
+
+ bb.1:
+
+ %102:vrm8 = PseudoVZEXT_VF8_M8 $noreg, $noreg, 16, 6 /* e64 */, 3 /* ta, ma */
+ %123:vrm8nov0 = COPY %102
+ %a123:vrm8 = COPY %123
+ %b123:vrm8nov0 = COPY %a123
+ %124:vmv0 = IMPLICIT_DEF
+ %121:vrm8nov0 = PseudoVMERGE_VVM_M8 $noreg, $noreg, %b123, undef %124, 16, 6 /* e64 */
+ %125:vrm8 = COPY %121
+ PseudoRET
+...
More information about the llvm-branch-commits
mailing list