[llvm] 66dc29a - [RISCV] Add tests for merges with differing VLs that could be folded
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 19 09:24:48 PDT 2023
Author: Luke Lau
Date: 2023-07-19T17:24:38+01:00
New Revision: 66dc29a82a7c9a5827caea98847be7547e451431
URL: https://github.com/llvm/llvm-project/commit/66dc29a82a7c9a5827caea98847be7547e451431
DIFF: https://github.com/llvm/llvm-project/commit/66dc29a82a7c9a5827caea98847be7547e451431.diff
LOG: [RISCV] Add tests for merges with differing VLs that could be folded
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D155069
Added:
Modified:
llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
index 2e25459100e966..2eaeefb1aa41ca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
@@ -148,3 +148,105 @@ define <vscale x 2 x i32> @vpmerge_vslide1down(<vscale x 2 x i32> %passthru, <vs
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 %vl)
ret <vscale x 2 x i32> %b
}
+
+; Tests for folding vmerge into its ops when their VLs
diff er
+
+; Can fold with VL=2
+define <vscale x 2 x i32> @vmerge_smaller_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_smaller_vl_same_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vadd.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v11
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 3, i64 0)
+ %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 2)
+ ret <vscale x 2 x i32> %b
+}
+
+; Can fold with VL=2
+define <vscale x 2 x i32> @vmerge_larger_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_larger_vl_same_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vadd.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v11
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 2, i64 0)
+ %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 3)
+ ret <vscale x 2 x i32> %b
+}
+
+; Can fold with VL=2
+define <vscale x 2 x i32> @vmerge_smaller_vl_
diff erent_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_smaller_vl_
diff erent_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu
+; CHECK-NEXT: vadd.vv v8, v10, v11, v0.t
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 3, i64 0)
+ %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 2)
+ ret <vscale x 2 x i32> %b
+}
+
+; Can't fold this because we need to take elements from both %pt1 and %pt2
+define <vscale x 2 x i32> @vmerge_larger_vl_
diff erent_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_larger_vl_
diff erent_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
+; CHECK-NEXT: vadd.vv v8, v10, v11, v0.t
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 2, i64 0)
+ %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 3)
+ ret <vscale x 2 x i32> %b
+}
+
+; Can fold with VL=2
+define <vscale x 2 x i32> @vmerge_smaller_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_smaller_vl_poison_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 3, i64 0)
+ %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 2)
+ ret <vscale x 2 x i32> %b
+}
+
+; Can fold with VL=2
+define <vscale x 2 x i32> @vmerge_larger_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_larger_vl_poison_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 2, i64 0)
+ %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 3)
+ ret <vscale x 2 x i32> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 5076227a8a50fc..bf70f42c352b87 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -981,3 +981,97 @@ declare <vscale x 32 x i16> @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(<vscale x 3
declare void @llvm.riscv.vse.nxv32i16.i64(<vscale x 32 x i16>, <vscale x 32 x i16>* nocapture, i64)
declare <vscale x 1 x i16> @llvm.riscv.vaaddu.nxv1i16.i16.i64(<vscale x 1 x i16>, <vscale x 1 x i16>, i16, i64 immarg, i64)
declare <vscale x 1 x i16> @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i64)
+
+; Tests for folding vmerge into its ops when their VLs
diff er
+
+declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, i64)
+declare <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i64)
+
+; Can fold with VL=2
+define <vscale x 2 x i32> @vmerge_smaller_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_smaller_vl_same_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vadd.vv v11, v9, v10
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 4)
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2)
+ ret <vscale x 2 x i32> %b
+}
+
+; Can fold with VL=2
+define <vscale x 2 x i32> @vmerge_larger_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_larger_vl_same_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vadd.vv v11, v9, v10
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
+ ret <vscale x 2 x i32> %b
+}
+
+; Can fold with VL=2
+define <vscale x 2 x i32> @vmerge_smaller_vl_
diff erent_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_smaller_vl_
diff erent_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vadd.vv v8, v10, v11
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 3)
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2)
+ ret <vscale x 2 x i32> %b
+}
+
+; Can't fold this because we need to take elements from both %pt1 and %pt2
+define <vscale x 2 x i32> @vmerge_larger_vl_
diff erent_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_larger_vl_
diff erent_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT: vadd.vv v8, v10, v11
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
+ ret <vscale x 2 x i32> %b
+}
+
+; Can fold with VL=2
+define <vscale x 2 x i32> @vmerge_smaller_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_smaller_vl_poison_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT: vadd.vv v9, v9, v10
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 3)
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2)
+ ret <vscale x 2 x i32> %b
+}
+
+; Can fold with VL=2
+define <vscale x 2 x i32> @vmerge_larger_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: vmerge_larger_vl_poison_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; CHECK-NEXT: vadd.vv v9, v9, v10
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
+ ret <vscale x 2 x i32> %b
+}
+
More information about the llvm-commits
mailing list