[llvm] 7ba6768 - Revert "[RISCV] Update V0Defs after moving Src in peepholes (#107359)"
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 22:26:25 PDT 2024
Author: Luke Lau
Date: 2024-09-10T13:26:07+08:00
New Revision: 7ba6768df8181bc270763333969d4a1d6cc2e160
URL: https://github.com/llvm/llvm-project/commit/7ba6768df8181bc270763333969d4a1d6cc2e160
DIFF: https://github.com/llvm/llvm-project/commit/7ba6768df8181bc270763333969d4a1d6cc2e160.diff
LOG: Revert "[RISCV] Update V0Defs after moving Src in peepholes (#107359)"
This fixes #107950 and adds a test case for it. The issue was due to
us incorrectly assuming that we stored a V0Defs entry for every single
instruction.
We actually only store them for instructions that use V0, so when we
updated the V0Def after moving we sometimes ended up copying nullptr
over from an instruction that doesn't use V0 and clearing the V0Def
entry inadvertently.
Because we don't have V0Defs on instructions that don't use V0, the
FIXME was never actually needed in the first place since the
bookkeeping wasn't out of sync to begin with.
That commit also mentioned that a future unmasked to masked pseudo
peephole might need unmasked pseudos to have V0Defs entries, but after
working on this locally it turns out we don't.
This reverts commit ce3648094d44e8c098396a353b215acecb363cda.
Added:
llvm/test/CodeGen/RISCV/rvv/pr107950.ll
Modified:
llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 384eb9d7b94642..298f3317bf61ac 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -61,7 +61,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
}
private:
- bool tryToReduceVL(MachineInstr &MI);
+ bool tryToReduceVL(MachineInstr &MI) const;
bool convertToVLMAX(MachineInstr &MI) const;
bool convertToWholeRegister(MachineInstr &MI) const;
bool convertToUnmasked(MachineInstr &MI) const;
@@ -73,7 +73,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
bool hasSameEEW(const MachineInstr &User, const MachineInstr &Src) const;
bool isAllOnesMask(const MachineInstr *MaskDef) const;
std::optional<unsigned> getConstant(const MachineOperand &VL) const;
- bool ensureDominates(const MachineOperand &Use, MachineInstr &Src);
+ bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const;
/// Maps uses of V0 to the corresponding def of V0.
DenseMap<const MachineInstr *, const MachineInstr *> V0Defs;
@@ -116,7 +116,7 @@ bool RISCVVectorPeephole::hasSameEEW(const MachineInstr &User,
// Attempt to reduce the VL of an instruction whose sole use is feeding a
// instruction with a narrower VL. This currently works backwards from the
// user instruction (which might have a smaller VL).
-bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) {
+bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
// Note that the goal here is a bit multifaceted.
// 1) For store's reducing the VL of the value being stored may help to
// reduce VL toggles. This is somewhat of an artifact of the fact we
@@ -526,18 +526,16 @@ static bool dominates(MachineBasicBlock::const_iterator A,
/// does. Returns false if doesn't dominate and we can't move. \p MO must be in
/// the same basic block as \Src.
bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
- MachineInstr &Src) {
+ MachineInstr &Src) const {
assert(MO.getParent()->getParent() == Src.getParent());
if (!MO.isReg() || MO.getReg() == RISCV::NoRegister)
return true;
MachineInstr *Def = MRI->getVRegDef(MO.getReg());
if (Def->getParent() == Src.getParent() && !dominates(Def, Src)) {
- MachineInstr *AfterDef = Def->getNextNode();
- if (!isSafeToMove(Src, *AfterDef))
+ if (!isSafeToMove(Src, *Def->getNextNode()))
return false;
- V0Defs[&Src] = V0Defs[AfterDef];
- Src.moveBefore(AfterDef);
+ Src.moveBefore(Def->getNextNode());
}
return true;
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr107950.ll b/llvm/test/CodeGen/RISCV/rvv/pr107950.ll
new file mode 100644
index 00000000000000..8384008c245fc2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr107950.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-linux-gnu"
+
+define void @m(<vscale x 4 x i1> %0) #0 {
+; CHECK-LABEL: m:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vlse32.v v8, (zero), zero, v0.t
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (zero)
+; CHECK-NEXT: ret
+entry:
+ %broadcast.splatinsert184 = insertelement <vscale x 4 x ptr> zeroinitializer, ptr null, i64 0
+ %broadcast.splat185 = shufflevector <vscale x 4 x ptr> %broadcast.splatinsert184, <vscale x 4 x ptr> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+ %wide.masked.gather186 = tail call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %broadcast.splat185, i32 4, <vscale x 4 x i1> %0, <vscale x 4 x i32> zeroinitializer)
+ %predphi187 = select <vscale x 4 x i1> %0, <vscale x 4 x i32> %wide.masked.gather186, <vscale x 4 x i32> zeroinitializer
+ %1 = extractelement <vscale x 4 x i32> %predphi187, i32 0
+ store i32 %1, ptr null, align 4
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
+declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr>, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i32>) #1
+
+attributes #0 = { "target-features"="+64bit,+d,+f,+relax,+v,+xsifivecdiscarddlone,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-a,-b,-c,-e,-experimental-smctr,-experimental-smmpm,-experimental-smnpm,-experimental-ssctr,-experimental-ssnpm,-experimental-sspm,-experimental-supm,-experimental-zacas,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-m,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smepmp,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zalrsc,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zifencei,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(read) }
More information about the llvm-commits
mailing list