[llvm] [RISCV] Disable performCombineVMergeAndVOps for PseduoVIOTA_M. (PR #71483)

Mon Nov 6 20:10:02 PST 2023

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Yeting Kuo (yetingk)

<details>
<summary>Changes</summary>

This transformation is illegal for PseduoVIOTA_M. The value of `viota.m vd, vs2` is the prefix sum of vd2 and adding mask for it may cause wrong prefix sum.

Take an example, the result of following expression is `{5, 5, 5, 3}`,
```
; v4 = {1, 1, 1, 1}
viota.m v1, v4
; v0 = {0, 0, 0, 1}, v1 = {0, 1, 2, 3}, v8 = {5, 5, 5, 5}
vmerge.vvm v8, v8, v1, v0.t
; v8 = {5, 5, 5, 3}
```
but if we merge them to `viota.m v8, v4, v0.t`, then the result of is `{5, 5, 5, 0}`

We still does the transformation when mask of vmerge.vvm is a true mask.

---
Full diff: https://github.com/llvm/llvm-project/pull/71483.diff


3 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+13) 
- (modified) llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll (+16) 
- (modified) llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll (+23-5) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 51a235bf2ca1861..f103d323648d16a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3501,6 +3501,19 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
   if (!True.isMachineOpcode())
     return false;
 
+  // This transformation is illegal for viota.m when Mask is not a true mask.
+  switch (True->getMachineOpcode()) {
+  case RISCV::PseudoVIOTA_M_MF8:
+  case RISCV::PseudoVIOTA_M_MF4:
+  case RISCV::PseudoVIOTA_M_MF2:
+  case RISCV::PseudoVIOTA_M_M1:
+  case RISCV::PseudoVIOTA_M_M2:
+  case RISCV::PseudoVIOTA_M_M4:
+  case RISCV::PseudoVIOTA_M_M8:
+    if (Mask && !usesAllOnesMask(Mask, Glue))
+      return false;
+  }
+
   unsigned TrueOpc = True.getMachineOpcode();
   const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
   uint64_t TrueTSFlags = TrueMCID.TSFlags;
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
index 7e137d6a6196921..f6d9d1e711e7169 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
@@ -258,3 +258,19 @@ entry:
   %res = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %i, <vscale x 2 x i32> %passthru, i32 %evl)
   ret <vscale x 2 x i32> %res
 }
+
+; Test VIOTA_M
+declare <vscale x 2 x i32> @llvm.riscv.viota.mask.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>,  <vscale x 2 x i1>, i64, i64)
+define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
+; CHECK-LABEL: vpmerge_viota:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    viota.m v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %1 = zext i32 %vl to i64
+  %a = call <vscale x 2 x i32> @llvm.riscv.viota.mask.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, <vscale x 2 x i1> %m, i64 %1, i64 0)
+  %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
+  %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 %1)
+  ret <vscale x 2 x i32> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 1ea2b7ef57cf081..df119435611d167 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -279,13 +279,15 @@ define <vscale x 2 x i32> @vpmerge_vid(<vscale x 2 x i32> %passthru, <vscale x 2
   ret <vscale x 2 x i32> %b
 }
 
-; Test riscv.viota
+; Test not combine VIOTA_M and VMERGE_VVM without true mask.
 declare <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i64)
 define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
 ; CHECK-LABEL: vpmerge_viota:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    viota.m v8, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    viota.m v10, v9
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
 ; CHECK-NEXT:    ret
   %1 = zext i32 %vl to i64
   %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
@@ -293,6 +295,21 @@ define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x
   ret <vscale x 2 x i32> %b
 }
 
+; Test combine VIOTA_M and VMERGE_VVM with true mask.
+define <vscale x 2 x i32> @vpmerge_viota2(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
+; CHECK-LABEL: vpmerge_viota2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    viota.m v8, v0
+; CHECK-NEXT:    ret
+  %1 = zext i32 %vl to i64
+  %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
+  %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
+  %true = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %true, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
+  ret <vscale x 2 x i32> %b
+}
+
 ; Test riscv.vfclass
 declare <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x float>, i64)
 define <vscale x 2 x i32> @vpmerge_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
@@ -730,8 +747,9 @@ define <vscale x 2 x i32> @vpselect_vid(<vscale x 2 x i32> %passthru, <vscale x
 define <vscale x 2 x i32> @vpselect_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
 ; CHECK-LABEL: vpselect_viota:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    viota.m v8, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    viota.m v10, v9
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
 ; CHECK-NEXT:    ret
   %1 = zext i32 %vl to i64
   %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)

``````````

</details>


https://github.com/llvm/llvm-project/pull/71483