[llvm] [RISCV] Sources of vmerge shouldn't overlap V0 (PR #170070)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 30 22:10:45 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Pengcheng Wang (wangpc-pp)
<details>
<summary>Changes</summary>
According to the spec:
> A vector register cannot be used to provide source operands with more
> than one EEW for a single instruction. A mask register source is
> considered to have EEW=1 for this constraint.
There must be a mask `V0` in `vmerge` variants so the sources should
use register classes without `V0`.
This fixes #<!-- -->169905.
---
Patch is 303.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170070.diff
33 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td (+15-15)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir (+22-22)
- (modified) llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll (+164-82)
- (modified) llvm/test/CodeGen/RISCV/rvv/copyprop.mir (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll (+27-28)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll (+411-411)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll (+286-148)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll (+25-14)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll (+26-53)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll (+25-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll (+82-179)
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll (+82-179)
- (modified) llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/pr88576.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir (+18-18)
- (modified) llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll (+11-9)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-splice.ll (+12-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll (+87-57)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll (+87-57)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll (+87-57)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll (+87-57)
- (modified) llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir (+18-18)
- (modified) llvm/test/CodeGen/RISCV/rvv/vl-optimizer-subreg-assert.mir (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll (+96-62)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll (+96-53)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir (+7-7)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmerge.ll (+3-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll (+5-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll (+96-62)
- (modified) llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll (+4-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll (+25-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vselect-int.ll (+3-2)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index eb3c9b0defccb..e36204c536c0d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2982,21 +2982,21 @@ multiclass VPseudoVFWALU_WV_WF_RM {
multiclass VPseudoVMRG_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
- def "_VVM" # "_" # m.MX:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, m.vrclass, m>,
- SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx,
- forcePassthruRead=true>;
- def "_VXM" # "_" # m.MX:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, GPR, m>,
- SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx,
- forcePassthruRead=true>;
- def "_VIM" # "_" # m.MX:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, simm5, m>,
- SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx,
- forcePassthruRead=true>;
+ def "_VVM"#"_"#m.MX : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R, m>,
+ SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx,
+ forcePassthruRead = true>;
+ def "_VXM"#"_"#m.MX
+ : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R, GPR, m>,
+ SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx,
+ forcePassthruRead = true>;
+ def "_VIM"#"_"#m.MX
+ : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R, simm5, m>,
+ SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx,
+ forcePassthruRead = true>;
}
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir
index f8061462c6220..ada76a43639d7 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir
@@ -11,7 +11,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv1i8
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -19,7 +19,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv1i8
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -40,7 +40,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv4i8
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -48,7 +48,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv4i8
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -69,7 +69,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv16i8
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV32I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]]
@@ -77,7 +77,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv16i8
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV64I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]]
@@ -98,7 +98,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv64i8
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -106,7 +106,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv64i8
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -127,7 +127,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv2i16
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -135,7 +135,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv2i16
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -156,7 +156,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv8i16
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV32I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]]
@@ -164,7 +164,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv8i16
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV64I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]]
@@ -185,7 +185,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv32i16
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]]
@@ -193,7 +193,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv32i16
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]]
@@ -214,7 +214,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv2i32
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV32I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]]
@@ -222,7 +222,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv2i32
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV64I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]]
@@ -243,7 +243,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv8i32
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV32I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]]
@@ -251,7 +251,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv8i32
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV64I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]]
@@ -272,7 +272,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv1i64
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */
; RV32I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]]
@@ -280,7 +280,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv1i64
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */
; RV64I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]]
@@ -301,7 +301,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv4i64
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */
; RV32I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]]
@@ -309,7 +309,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv4i64
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */
; RV64I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]]
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
index 2d4fce68f9545..27b53befbf4a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -288,54 +288,89 @@ define i32 @test_nxv128i1(<vscale x 128 x i1> %x) {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v7, v8
; CHECK-NEXT: vmv1r.v v6, v0
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
; CHECK-NEXT: srli a1, a0, 1
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v0, a1
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vim v24, v16, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v0, a0
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v6, a0
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a0
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
+; CHECK-NEXT: vadd.vi v24, v16, 1
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v7, a1
-; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v7, a0
-; CHECK-NEXT: vslidedown.vx v5, v6, a0
-; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v5
-; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vx v0, v6, a0
+; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vadd.vi v24, v8, 1
+; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
+; CHECK-NEXT: vadd.vv v8, v16, v8
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vadd.vi v16, v8, 1
; CHECK-NEXT: vmv1r.v v0, v6
-; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vadd.vi v24, v24, 1
; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0
+; CHECK-NEXT: vadd.vv v16, v24, v16
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
-; CHECK-NEXT: vadd.vv v16, v16, v24
+; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vadd.vv v8, v16, v8
; CHECK-NEXT: vmv.s.x v16, zero
; CHECK-NEXT: vredsum.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: add sp, sp, a1
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
@@ -353,12 +388,14 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v6, v10
; CHECK-NEXT: vmv1r.v v7, v9
@@ -368,9 +405,9 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: mv a2, a0
-; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
@@ -378,7 +415,10 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
@@ -391,127 +431,169 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: vsetvli...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/170070
More information about the llvm-commits
mailing list