[llvm] 1ebe16b - [RISCV] Add VL optimization related tests
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 11:16:48 PDT 2024
Author: Michael Maitland
Date: 2024-09-17T11:14:20-07:00
New Revision: 1ebe16bf43e990135bee8d439176c472e7514866
URL: https://github.com/llvm/llvm-project/commit/1ebe16bf43e990135bee8d439176c472e7514866
DIFF: https://github.com/llvm/llvm-project/commit/1ebe16bf43e990135bee8d439176c472e7514866.diff
LOG: [RISCV] Add VL optimization related tests
These tests are good candidate for VL optimization. This is a pre-commit for
PR #108640, but can could probably also be improved by the peephole VL
optimizations.
Added:
llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll
llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll
new file mode 100644
index 00000000000000..e1f641afd2cfe0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-no-prop.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
+
+declare <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen)
+declare <vscale x 4 x i32> @llvm.riscv.vrgather.vv.nxv4i32.iXLen(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i32>,
+ <vscale x 4 x i32>,
+ iXLen)
+
+declare <vscale x 4 x i32> @llvm.riscv.vslidedown.nxv4i32(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i32>,
+ iXLen,
+ iXLen,
+ iXLen);
+
+declare <vscale x 4 x i32> @llvm.riscv.vslide1down.nxv4i32.i32(
+ <vscale x 4 x i32>,
+ <vscale x 4 x i32>,
+ i32,
+ iXLen);
+
+define <vscale x 4 x i32> @vrgather(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
+; CHECK-LABEL: vrgather:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v12, v10, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vrgather.vv v8, v12, v10
+; CHECK-NEXT: ret
+ %v = add <vscale x 4 x i32> %a, %b
+ %w = call <vscale x 4 x i32> @llvm.riscv.vrgather.vv.nxv4i32.iXLen(
+ <vscale x 4 x i32> poison,
+ <vscale x 4 x i32> %v,
+ <vscale x 4 x i32> %a,
+ iXLen %vl1)
+
+ ret <vscale x 4 x i32> %w
+}
+
+define <vscale x 4 x i32> @vslidedown(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
+; CHECK-LABEL: vslidedown:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v10, v12, v14
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v10, a0
+; CHECK-NEXT: ret
+entry:
+ %v = add <vscale x 4 x i32> %a, %b
+ %w = call <vscale x 4 x i32> @llvm.riscv.vslidedown.nxv4i32(
+ <vscale x 4 x i32> %0,
+ <vscale x 4 x i32> %v,
+ iXLen %2,
+ iXLen %2,
+ iXLen 1)
+
+ ret <vscale x 4 x i32> %w
+}
+
+define <vscale x 4 x i32> @vslide1down(<vscale x 4 x i32> %0, i32 %1, iXLen %2, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
+; CHECK-LABEL: vslide1down:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v10, v12
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: ret
+entry:
+ %v = add <vscale x 4 x i32> %a, %b
+ %w = call <vscale x 4 x i32> @llvm.riscv.vslide1down.nxv4i32.i32(
+ <vscale x 4 x i32> poison,
+ <vscale x 4 x i32> %v,
+ i32 %1,
+ iXLen %2)
+
+ ret <vscale x 4 x i32> %w
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
new file mode 100644
index 00000000000000..2b3c5417b15b55
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s
+
+define <2 x i32> @vdot_lane_s32(<2 x i32> noundef %var_1, <8 x i8> noundef %var_3, <8 x i8> noundef %var_5, <8 x i16> %x) {
+; CHECK-LABEL: vdot_lane_s32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v11, 0
+; CHECK-NEXT: vnsrl.wi v9, v11, 16
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vnsrl.wx v9, v10, a0
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %b = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %c = sext <4 x i16> %a to <4 x i32>
+ %d = sext <4 x i16> %b to <4 x i32>
+ %e = add nsw <4 x i32> %c, %d
+ %z10 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+ %z11 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+ %y12 = add <2 x i32> %z10, %z11
+ ret <2 x i32> %y12
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
+ <vscale x 2 x i16>,
+ <vscale x 2 x i32>,
+ <vscale x 2 x i16>,
+ iXLen);
+
+define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
+; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wv v8, v10, v12
+; CHECK-NEXT: ret
+entry:
+ %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
+ %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
+ %v1 = add <vscale x 2 x i32> %c, %d
+ %x = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
+ <vscale x 2 x i16> undef,
+ <vscale x 2 x i32> %v1,
+ <vscale x 2 x i16> %z,
+ iXLen %2)
+
+ ret <vscale x 2 x i16> %x
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
+ <vscale x 2 x i16>,
+ <vscale x 2 x i32>,
+ <vscale x 2 x i16>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i16> @vnclip(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
+; CHECK-LABEL: vnclip:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vnclip.wv v8, v10, v12
+; CHECK-NEXT: ret
+entry:
+ %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
+ %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
+ %v1 = add <vscale x 2 x i32> %c, %d
+ %x = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
+ <vscale x 2 x i16> undef,
+ <vscale x 2 x i32> %v1,
+ <vscale x 2 x i16> %z,
+ iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x i16> %x
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
new file mode 100644
index 00000000000000..b03ba076059503
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
+
+declare <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen)
+
+define <vscale x 4 x i32> @
diff erent_imm_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
+; CHECK-LABEL:
diff erent_imm_vl_with_ta:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v10, v12
+; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4)
+ ret <vscale x 4 x i32> %w
+}
+
+; No benificial to propagate VL since VL is larger in the use side.
+define <vscale x 4 x i32> @
diff erent_imm_vl_with_ta_larger_vl(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
+; CHECK-LABEL:
diff erent_imm_vl_with_ta_larger_vl:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v10, v12
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 5)
+ ret <vscale x 4 x i32> %w
+}
+
+define <vscale x 4 x i32> @
diff erent_imm_reg_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
+; CHECK-LABEL:
diff erent_imm_reg_vl_with_ta:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v10, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl1)
+ ret <vscale x 4 x i32> %w
+}
+
+
+; No benificial to propagate VL since VL is already one.
+define <vscale x 4 x i32> @
diff erent_imm_vl_with_ta_1(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
+; CHECK-LABEL:
diff erent_imm_vl_with_ta_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v10, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 1)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl1)
+ ret <vscale x 4 x i32> %w
+}
+
+; Propgate %vl2 to last instruction since it is may smaller than %vl1,
+; it's still safe even %vl2 is larger than %vl1, becuase rest of the vector are
+; undefined value.
+define <vscale x 4 x i32> @
diff erent_vl_with_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
+; CHECK-LABEL:
diff erent_vl_with_ta:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v10, v8, v10
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2)
+ ret <vscale x 4 x i32> %w
+}
+
+; Test case to make sure VL won't propgate if using tail-undisturbed policy.
+define <vscale x 4 x i32> @
diff erent_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
+; CHECK-LABEL:
diff erent_vl_with_tu:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v14, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT: vadd.vv v14, v10, v12
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; CHECK-NEXT: vadd.vv v8, v14, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2)
+ ret <vscale x 4 x i32> %w
+}
+
+; Test case to make sure VL won't propgate if using tail-undisturbed policy.
+define <vscale x 4 x i32> @
diff erent_imm_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
+; CHECK-LABEL:
diff erent_imm_vl_with_tu:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v14, v10
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT: vadd.vv v14, v10, v12
+; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; CHECK-NEXT: vadd.vv v8, v14, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
+ %w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen 4)
+ ret <vscale x 4 x i32> %w
+}
More information about the llvm-commits
mailing list