[llvm] [RISCV] Set CostPerUse to 1 for registers that contain v0 (PR #174372)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 5 01:00:08 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Pengcheng Wang (wangpc-pp)
<details>
<summary>Changes</summary>
So that we can reduce the use of v0.
`v0` is special as it will be used as mask register. According to
the Software Optimization Guide[^1], we should avoid the use of v0.
1. https://riscv-optimization-guide.riseproject.dev/#_avoid_using_v0_for_non_mask_operations
---
Patch is 366.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174372.diff
42 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetRegisterInfo.h (+6)
- (modified) llvm/lib/CodeGen/RegAllocGreedy.cpp (+1-1)
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h (+4)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.cpp (+1-2)
- (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp (+16)
- (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.h (+3)
- (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.td (+6-1)
- (modified) llvm/test/CodeGen/RISCV/pr69586.ll (+26-14)
- (modified) llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll (+36-36)
- (modified) llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll (+53-15)
- (modified) llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll (+7-7)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll (+40-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll (+52-52)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll (+6-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll (+6-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll (+17-17)
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll (+78-92)
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll (+78-92)
- (modified) llvm/test/CodeGen/RISCV/rvv/nontemporal-vp-scalable.ll (+330-230)
- (modified) llvm/test/CodeGen/RISCV/rvv/remat.ll (+51-38)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll (+8-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll (+78-78)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll (+114-114)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll (+16-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll (+8-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll (+504-258)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll (+120-86)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll (+132-78)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll (+88-46)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll (+118-54)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll (+6-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll (+12-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll (+17-17)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll (+32-32)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll (+37-37)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll (+8-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vp-splice.ll (+8-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll (+166-210)
- (modified) llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll (+23-22)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 35b14e8b8fd30..9d531372bc963 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -806,6 +806,12 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo {
return nullptr;
}
+ /// Return true if we should add extra register cost when the cost is not 0.
+ virtual unsigned shouldAddExtraCost(const LiveInterval &LI,
+ const MachineRegisterInfo *MRI) const {
+ return true;
+ }
+
protected:
/// Overridden by TableGen in targets that have sub-registers.
virtual unsigned composeSubRegIndicesImpl(unsigned, unsigned) const {
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index a059cb55371a3..33e1e90f281b2 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -573,7 +573,7 @@ MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
uint8_t Cost = RegCosts[PhysReg.id()];
// Most registers have 0 additional cost.
- if (!Cost)
+ if (!Cost || !TRI->shouldAddExtraCost(VirtReg, MRI))
return PhysReg;
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index a047324ef36fa..3f20160793ffc 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -219,6 +219,10 @@ static inline bool elementsDependOnMask(uint64_t TSFlags) {
return TSFlags & ElementsDependOnMaskMask;
}
+static inline unsigned getDestEEW(uint64_t TSFlags) {
+ return (TSFlags & DestEEWMask) >> DestEEWShift;
+}
+
/// \returns true if the instruction may read elements past VL, e.g.
/// vslidedown/vrgather
static inline bool readsPastVL(uint64_t TSFlags) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 400b680a3ff12..af36601578f6d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -5097,8 +5097,7 @@ unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
}
unsigned RISCV::getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW) {
- unsigned DestEEW =
- (Desc.TSFlags & RISCVII::DestEEWMask) >> RISCVII::DestEEWShift;
+ unsigned DestEEW = RISCVII::getDestEEW(Desc.TSFlags);
// EEW = 1
if (DestEEW == 0)
return 0;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index e69dbef3d1f71..242e90ee3381b 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -11,12 +11,16 @@
//===----------------------------------------------------------------------===//
#include "RISCVRegisterInfo.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "RISCV.h"
+#include "RISCVInstrInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -843,6 +847,18 @@ void RISCVRegisterInfo::getOffsetOpcodes(const StackOffset &Offset,
}
}
+unsigned
+RISCVRegisterInfo::shouldAddExtraCost(const LiveInterval &LI,
+ const MachineRegisterInfo *MRI) const {
+ if (const MachineInstr *MI = MRI->getUniqueVRegDef(LI.reg())) {
+ unsigned TSFlags = MI->getDesc().TSFlags;
+ // Only add the extra cost when DestEEW is not 1 (which means it is not a
+ // mask instruction).
+ return !(RISCVII::hasSEWOp(TSFlags) && RISCVII::getDestEEW(TSFlags) != 0);
+ }
+ return true;
+}
+
unsigned
RISCVRegisterInfo::getRegisterCostTableIndex(const MachineFunction &MF) const {
return MF.getSubtarget<RISCVSubtarget>().hasStdExtZca() && !DisableCostPerUse
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index f29f85e4987f6..d93515a7f24bc 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -141,6 +141,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
void getOffsetOpcodes(const StackOffset &Offset,
SmallVectorImpl<uint64_t> &Ops) const override;
+ unsigned shouldAddExtraCost(const LiveInterval &LI,
+ const MachineRegisterInfo *MRI) const override;
+
unsigned getRegisterCostTableIndex(const MachineFunction &MF) const override;
float getSpillWeightScaleFactor(const TargetRegisterClass *RC) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index e3657badfa9a4..8eb38eb6b8f8b 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -689,7 +689,9 @@ class VRegList<list<dag> LIn, int start, int nf, int lmul, bit isV0> {
//===----------------------------------------------------------------------===//
foreach Index = !range(0, 32, 1) in {
- def V#Index : RISCVReg<Index, "v"#Index>, DwarfRegNum<[!add(Index, 96)]>;
+ def V#Index : RISCVReg<Index, "v"#Index>, DwarfRegNum<[!add(Index, 96)]> {
+ let CostPerUse = [!if(!eq(Index, 0), 1, 0)];
+ }
}
foreach Index = !range(0, 32, 2) in {
@@ -698,6 +700,7 @@ foreach Index = !range(0, 32, 2) in {
!cast<Register>("V"#!add(Index, 1))]>,
DwarfRegAlias<!cast<Register>("V"#Index)> {
let SubRegIndices = [sub_vrm1_0, sub_vrm1_1];
+ let CostPerUse = [!if(!eq(Index, 0), 1, 0)];
}
}
@@ -707,6 +710,7 @@ foreach Index = !range(0, 32, 4) in {
!cast<Register>("V"#!add(Index, 2)#"M2")]>,
DwarfRegAlias<!cast<Register>("V"#Index)> {
let SubRegIndices = [sub_vrm2_0, sub_vrm2_1];
+ let CostPerUse = [!if(!eq(Index, 0), 1, 0)];
}
}
@@ -716,6 +720,7 @@ foreach Index = !range(0, 32, 8) in {
!cast<Register>("V"#!add(Index, 4)#"M4")]>,
DwarfRegAlias<!cast<Register>("V"#Index)> {
let SubRegIndices = [sub_vrm4_0, sub_vrm4_1];
+ let CostPerUse = [!if(!eq(Index, 0), 1, 0)];
}
}
diff --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll
index fa447c8f4fec1..c1aaab54e3fe6 100644
--- a/llvm/test/CodeGen/RISCV/pr69586.ll
+++ b/llvm/test/CodeGen/RISCV/pr69586.ll
@@ -1031,7 +1031,13 @@ define void @test(ptr %0, ptr %1, i64 %2) {
; REMAT-NEXT: add a2, a0, t6
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v22
-; REMAT-NEXT: vle32.v v20, (a2)
+; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: csrr a2, vlenb
+; REMAT-NEXT: li a5, 10
+; REMAT-NEXT: mul a2, a2, a5
+; REMAT-NEXT: add a2, sp, a2
+; REMAT-NEXT: addi a2, a2, 432
+; REMAT-NEXT: vs2r.v v8, (a2) # vscale x 16-byte Folded Spill
; REMAT-NEXT: add a2, a0, s0
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v24, v26
@@ -1074,11 +1080,17 @@ define void @test(ptr %0, ptr %1, i64 %2) {
; REMAT-NEXT: mul a5, a5, a6
; REMAT-NEXT: add a5, sp, a5
; REMAT-NEXT: addi a5, a5, 432
-; REMAT-NEXT: vl2r.v v0, (a5) # vscale x 16-byte Folded Reload
-; REMAT-NEXT: sf.vc.vv 3, 0, v0, v18
+; REMAT-NEXT: vl2r.v v20, (a5) # vscale x 16-byte Folded Reload
+; REMAT-NEXT: sf.vc.vv 3, 0, v20, v18
; REMAT-NEXT: vle32.v v0, (a2)
; REMAT-NEXT: add a2, a0, s7
; REMAT-NEXT: vle32.v v18, (a2)
+; REMAT-NEXT: csrr a5, vlenb
+; REMAT-NEXT: li a6, 10
+; REMAT-NEXT: mul a5, a5, a6
+; REMAT-NEXT: add a5, sp, a5
+; REMAT-NEXT: addi a5, a5, 432
+; REMAT-NEXT: vl2r.v v20, (a5) # vscale x 16-byte Folded Reload
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v22
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: add a2, a0, s8
@@ -1293,9 +1305,9 @@ define void @test(ptr %0, ptr %1, i64 %2) {
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v0
; REMAT-NEXT: vle32.v v20, (a2)
-; REMAT-NEXT: li s7, 21
-; REMAT-NEXT: slli s7, s7, 10
-; REMAT-NEXT: add a2, a0, s7
+; REMAT-NEXT: li s5, 21
+; REMAT-NEXT: slli s5, s5, 10
+; REMAT-NEXT: add a2, a0, s5
; REMAT-NEXT: vle32.v v0, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v22, v24
; REMAT-NEXT: vle32.v v22, (a2)
@@ -1493,14 +1505,14 @@ define void @test(ptr %0, ptr %1, i64 %2) {
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sd a0, 312(sp) # 8-byte Folded Spill
-; REMAT-NEXT: add s5, a1, s5
-; REMAT-NEXT: sd s5, 304(sp) # 8-byte Folded Spill
+; REMAT-NEXT: li a0, 9
+; REMAT-NEXT: slli a0, a0, 10
+; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: sd a0, 304(sp) # 8-byte Folded Spill
; REMAT-NEXT: add s6, a1, s6
; REMAT-NEXT: sd s6, 296(sp) # 8-byte Folded Spill
-; REMAT-NEXT: li a0, 5
-; REMAT-NEXT: slli a0, a0, 11
-; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sd a0, 288(sp) # 8-byte Folded Spill
+; REMAT-NEXT: add s7, a1, s7
+; REMAT-NEXT: sd s7, 288(sp) # 8-byte Folded Spill
; REMAT-NEXT: add s8, a1, s8
; REMAT-NEXT: sd s8, 280(sp) # 8-byte Folded Spill
; REMAT-NEXT: add s9, a1, s9
@@ -1571,8 +1583,8 @@ define void @test(ptr %0, ptr %1, i64 %2) {
; REMAT-NEXT: addi a0, a0, 512
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sd a0, 120(sp) # 8-byte Folded Spill
-; REMAT-NEXT: add s7, a1, s7
-; REMAT-NEXT: sd s7, 112(sp) # 8-byte Folded Spill
+; REMAT-NEXT: add s5, a1, s5
+; REMAT-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
; REMAT-NEXT: add s4, a1, s4
; REMAT-NEXT: sd s4, 104(sp) # 8-byte Folded Spill
; REMAT-NEXT: li a0, 11
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
index 39732602cc85e..4e58fe11c57f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -70,7 +70,7 @@ define fastcc <vscale x 64 x i32> @ret_split_nxv64i32(ptr %x) {
; CHECK-LABEL: ret_split_nxv64i32:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: vl8re32.v v8, (a1)
+; CHECK-NEXT: vl8re32.v v0, (a1)
; CHECK-NEXT: slli a3, a2, 3
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a4, a2, a3
@@ -81,12 +81,12 @@ define fastcc <vscale x 64 x i32> @ret_split_nxv64i32(ptr %x) {
; CHECK-NEXT: add a3, a0, a3
; CHECK-NEXT: add a1, a1, a4
; CHECK-NEXT: vl8re32.v v24, (a5)
-; CHECK-NEXT: vl8re32.v v0, (a1)
-; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re32.v v8, (a1)
+; CHECK-NEXT: vs8r.v v0, (a0)
; CHECK-NEXT: vs8r.v v16, (a2)
; CHECK-NEXT: vs8r.v v24, (a3)
; CHECK-NEXT: add a0, a0, a4
-; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: ret
%v = load <vscale x 64 x i32>, ptr %x
ret <vscale x 64 x i32> %v
@@ -264,28 +264,28 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32
; CHECK-NEXT: vl8re32.v v8, (a2)
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vl8re32.v v0, (a0)
+; CHECK-NEXT: vl8re32.v v24, (a0)
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a2, a2, a1
; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl8re32.v v8, (a0)
+; CHECK-NEXT: vl8re32.v v0, (a0)
; CHECK-NEXT: vl8re32.v v16, (a2)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vadd.vv v0, v24, v0
+; CHECK-NEXT: vadd.vv v24, v8, v24
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vadd.vv v24, v0, v24
+; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vadd.vv v24, v24, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vadd.vv v8, v0, v8
+; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vadd.vv v8, v8, v0
; CHECK-NEXT: vadd.vv v8, v8, v16
; CHECK-NEXT: vadd.vx v16, v8, a4
; CHECK-NEXT: vadd.vx v8, v24, a4
@@ -326,8 +326,8 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a3, a0, a1
-; RV32-NEXT: vl8re32.v v24, (a3)
-; RV32-NEXT: vl8re32.v v0, (a0)
+; RV32-NEXT: vl8re32.v v0, (a3)
+; RV32-NEXT: vl8re32.v v24, (a0)
; RV32-NEXT: addi a3, sp, 128
; RV32-NEXT: addi a0, sp, 128
; RV32-NEXT: vs8r.v v8, (a3)
@@ -335,8 +335,8 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV32-NEXT: li a3, 2
; RV32-NEXT: vs8r.v v16, (a1)
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv8r.v v8, v0
-; RV32-NEXT: vmv8r.v v16, v24
+; RV32-NEXT: vmv8r.v v8, v24
+; RV32-NEXT: vmv8r.v v16, v0
; RV32-NEXT: call ext2
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: .cfi_def_cfa sp, 144
@@ -365,8 +365,8 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a3, a0, a1
-; RV64-NEXT: vl8re32.v v24, (a3)
-; RV64-NEXT: vl8re32.v v0, (a0)
+; RV64-NEXT: vl8re32.v v0, (a3)
+; RV64-NEXT: vl8re32.v v24, (a0)
; RV64-NEXT: addi a3, sp, 128
; RV64-NEXT: addi a0, sp, 128
; RV64-NEXT: vs8r.v v8, (a3)
@@ -374,8 +374,8 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV64-NEXT: li a3, 2
; RV64-NEXT: vs8r.v v16, (a1)
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv8r.v v8, v0
-; RV64-NEXT: vmv8r.v v16, v24
+; RV64-NEXT: vmv8r.v v8, v24
+; RV64-NEXT: vmv8r.v v16, v0
; RV64-NEXT: call ext2
; RV64-NEXT: addi sp, s0, -144
; RV64-NEXT: .cfi_def_cfa sp, 144
@@ -406,8 +406,8 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: andi sp, sp, -128
-; RV32-NEXT: addi a1, sp, 128
-; RV32-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vmv8r.v v0, v16
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: vl8re32.v v16, (a2)
; RV32-NEXT: csrr a3, vlenb
@@ -418,7 +418,9 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a2, a2, a1
; RV32-NEXT: add a3, a0, a1
-; RV32-NEXT: vl8re32.v v0, (a2)
+; RV32-NEXT: vl8re32.v v16, (a2)
+; RV32-NEXT: addi a2, sp, 128
+; RV32-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
; RV32-NEXT: vl8re32.v v24, (a3)
; RV32-NEXT: vl8re32.v v16, (a0)
; RV32-NEXT: csrr a0, vlenb
@@ -432,9 +434,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: addi a3, a3, 128
; RV32-NEXT: vs8r.v v16, (a3)
; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: addi a2, sp, 128
-; RV32-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; RV32-NEXT: vs8r.v v8, (a0)
+; RV32-NEXT: vs8r.v v0, (a0)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
@@ -451,8 +451,8 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 128
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv8r.v v16, v0
+; RV32-NEXT: addi a1, sp, 128
+; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: call ext3
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: .cfi_def_cfa sp, 144
@@ -479,8 +479,8 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: mul a1, a1, a3
; RV64-NEXT: sub sp, sp, a1
; RV64-NEXT: andi sp, sp, -128
-; RV64-NEXT: addi a1, sp, 128
-; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vmv8r.v v0, v16
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: vl8re32.v v16, (a2)
; RV64-NEXT: csrr a3, vlenb
@@ -491,7 +491,9 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a2, a2, a1
; RV64-NEXT: add a3, a0, a1
-; RV64-NEXT: vl8re32.v v0, (a2)
+; RV64-NEXT: vl8re32.v v16, (a2)
+; RV64-NEXT: addi a2, sp, 128
+; RV64-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
; RV64-NEXT: vl8re32.v v24, (a3)
; RV64-NEXT: vl8re32.v v16, (a0)
; RV64-NEXT: csrr a0, vlenb
@@ -505,9 +507,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: addi a3, a3, 128
; RV64-NEXT: vs8r.v v16, (a3)
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: addi a2, sp, 128
-; RV64-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; RV64-NEXT: vs8r.v v8, (a0)
+; RV64-NEXT: vs8r.v v0, (a0)
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 5
; RV64-NEXT: add a0, sp, a0
@@ -524,8 +524,8 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 128
; RV64-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
-; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64-NEXT: vmv8r.v v16, v0
+; RV64-NEXT: addi a1, sp, 128
+; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
; RV64-NEXT: call ext3
; RV64-NEXT: addi sp, s0, -144
; RV64-NEXT: .cfi_def_cfa sp, 144
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
index 96252f070a580..961857968a2f3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -95,9 +95,12 @@ define i32 @test_v256i1(<256 x i1> %x) {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; CHECK-NEXT: vmv1r.v v7, v8
; CHECK-NEXT: vmv1r.v v6, v0
@@ -110,7 +113,7 @@ define i32 @test_v256i1(<256 x i1> %x) {
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
@@ -118,7 +121,10 @@ define i32 @test_v256i1(<256 x i1> %x) {
; CHECK-NEXT: vslidedown.vi v0, v4, 4
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v7, 4
@@ -127,...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/174372
More information about the llvm-commits
mailing list