[llvm] [RISCV][VLOPT] Add support for Vector Fixed-Point Arithmetic Instructions (PR #126483)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 10 00:31:39 PST 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/126483
>From d29aa887aa34fb6c076a3c7b6dd5be2fc1e1f863 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 10 Feb 2025 16:15:16 +0800
Subject: [PATCH 1/2] Precommit tests
---
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 532 +++++++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 20 +
2 files changed, 552 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 053f1209cf21464..eb3969ed9d19e6f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -2511,6 +2511,538 @@ define <vscale x 4 x i32> @vwmaccus_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x
ret <vscale x 4 x i32> %2
}
+define <vscale x 4 x i32> @vsaddu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsaddu_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vsaddu.vv v10, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vsaddu_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vsaddu.vv v10, v8, v10
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vsaddu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsaddu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsaddu_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vsaddu.vx v10, v8, a0
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vsaddu_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vsaddu.vx v10, v8, a0
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vsaddu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsaddu_vi(<vscale x 4 x i32> %a, iXLen %vl) {
+; NOVLOPT-LABEL: vsaddu_vi:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vsaddu.vi v10, v8, 5
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vsaddu_vi:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vsaddu.vi v10, v8, 5
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vsaddu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsadd_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vsadd.vv v10, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vsadd_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vsadd.vv v10, v8, v10
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vsadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsadd_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vsadd.vx v10, v8, a0
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vsadd_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vsadd.vx v10, v8, a0
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vsadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsadd_vi(<vscale x 4 x i32> %a, iXLen %vl) {
+; NOVLOPT-LABEL: vsadd_vi:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vsadd.vi v10, v8, 5
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vsadd_vi:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vsadd.vi v10, v8, 5
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vsadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vssubu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vssubu_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vssubu.vv v10, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vssubu_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vssubu.vv v10, v8, v10
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vssubu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vssubu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vssubu_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vssubu.vx v10, v8, a0
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vssubu_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vssubu.vx v10, v8, a0
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vssubu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vssub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vssub_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vssub.vv v10, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vssub_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vssub.vv v10, v8, v10
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vssub(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vssub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vssub_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vssub.vx v10, v8, a0
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vssub_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vssub.vx v10, v8, a0
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vssub(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsmul_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsmul_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vsmul.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vsmul_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vsmul.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vsmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsmul_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsmul_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vsmul.vx v10, v8, a0
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vsmul_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vsmul.vx v10, v8, a0
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vsmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vssrl_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vssrl_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vssrl.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vssrl_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vssrl.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vssrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vssrl_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) {
+; NOVLOPT-LABEL: vssrl_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vssrl.vx v10, v8, a0
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vssrl_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vssrl.vx v10, v8, a0
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vssrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vssrl_vi(<vscale x 4 x i32> %a, iXLen %vl) {
+; NOVLOPT-LABEL: vssrl_vi:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vssrl.vi v10, v8, 5
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vssrl_vi:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vssrl.vi v10, v8, 5
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vssrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vssra_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vssra_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vssra.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vssra_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vssra.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vssra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vssra_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) {
+; NOVLOPT-LABEL: vssra_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vssra.vx v10, v8, a0
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vssra_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vssra.vx v10, v8, a0
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vssra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vssra_vi(<vscale x 4 x i32> %a, iXLen %vl) {
+; NOVLOPT-LABEL: vssra_vi:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vssra.vi v10, v8, 5
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vssra_vi:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vssra.vi v10, v8, 5
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vssra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnclipu_vv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnclipu_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vnclipu.wv v14, v8, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v14, v14
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnclipu_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vnclipu.wv v14, v8, v12
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v14, v14
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnclipu_vx(<vscale x 4 x i64> %a, iXLen %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnclipu_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vnclipu.wx v12, v8, a0
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v12, v12
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnclipu_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vnclipu.wx v12, v8, a0
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v12, v12
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen %b, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnclipu_vi(<vscale x 4 x i64> %a, iXLen %vl) {
+; NOVLOPT-LABEL: vnclipu_vi:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vnclipu.wi v12, v8, 5
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v12, v12
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnclipu_vi:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vnclipu.wi v12, v8, 5
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v12, v12
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen 5, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnclip_vv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnclip_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vnclip.wv v14, v8, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v14, v14
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnclip_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vnclip.wv v14, v8, v12
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v14, v14
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnclip_vx(<vscale x 4 x i64> %a, iXLen %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnclip_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vnclip.wx v12, v8, a0
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v12, v12
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnclip_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vnclip.wx v12, v8, a0
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v12, v12
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen %b, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnclip_vi(<vscale x 4 x i64> %a, iXLen %vl) {
+; NOVLOPT-LABEL: vnclip_vi:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: csrwi vxrm, 0
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vnclip.wi v12, v8, 5
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v12, v12
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnclip_vi:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: csrwi vxrm, 0
+; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vnclip.wi v12, v8, 5
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v12, v12
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen 5, iXLen 0, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
define <vscale x 4 x i32> @vmv_v_i(<vscale x 4 x i32> %a, i32 %x, iXLen %vl) {
; NOVLOPT-LABEL: vmv_v_i:
; NOVLOPT: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 78054c73d848f78..135f35305969028 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -270,3 +270,23 @@ body: |
%a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
%b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */
...
+---
+name: vxsat_dead
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vxsat_dead
+ ; CHECK: %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat
+ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+ %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat
+ %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+...
+---
+name: vxsat_not_dead
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vxsat_not_dead
+ ; CHECK: %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat
+ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+ %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat
+ %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+...
>From 0c978c0db80a0f5c2a7541e5a3bbf85ae444ed8b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 10 Feb 2025 16:23:34 +0800
Subject: [PATCH 2/2] [RISCV][VLOPT] Add support for Vector Fixed-Point
Arithmetic Instructions
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 38 ++++++++++-
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 72 +++++++-------------
llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 2 +-
3 files changed, 62 insertions(+), 50 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index d4829bced247091..2d0e2a423bc4816 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -976,6 +976,17 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VMV_V_I:
case RISCV::VMV_V_X:
case RISCV::VMV_V_V:
+ // Vector Single-Width Saturating Add and Subtract
+ case RISCV::VSADDU_VV:
+ case RISCV::VSADDU_VX:
+ case RISCV::VSADDU_VI:
+ case RISCV::VSADD_VV:
+ case RISCV::VSADD_VX:
+ case RISCV::VSADD_VI:
+ case RISCV::VSSUBU_VV:
+ case RISCV::VSSUBU_VX:
+ case RISCV::VSSUB_VV:
+ case RISCV::VSSUB_VX:
// Vector Single-Width Averaging Add and Subtract
case RISCV::VAADDU_VV:
case RISCV::VAADDU_VX:
@@ -985,6 +996,23 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VASUBU_VX:
case RISCV::VASUB_VV:
case RISCV::VASUB_VX:
+ // Vector Single-Width Fractional Multiply with Rounding and Saturation
+ case RISCV::VSMUL_VV:
+ case RISCV::VSMUL_VX:
+ // Vector Single-Width Scaling Shift Instructions
+ case RISCV::VSSRL_VV:
+ case RISCV::VSSRL_VX:
+ case RISCV::VSSRL_VI:
+ case RISCV::VSSRA_VV:
+ case RISCV::VSSRA_VX:
+ case RISCV::VSSRA_VI:
+ // Vector Narrowing Fixed-Point Clip Instructions
+ case RISCV::VNCLIPU_WV:
+ case RISCV::VNCLIPU_WX:
+ case RISCV::VNCLIPU_WI:
+ case RISCV::VNCLIP_WV:
+ case RISCV::VNCLIP_WX:
+ case RISCV::VNCLIP_WI:
// Vector Crypto
case RISCV::VWSLL_VI:
@@ -1173,8 +1201,16 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
const MCInstrDesc &Desc = MI.getDesc();
if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
return false;
- if (MI.getNumDefs() != 1)
+
+ if (MI.getNumExplicitDefs() != 1)
+ return false;
+
+ // Some instructions have implicit defs e.g. $vxsat. If they might be read
+ // later then we can't reduce VL.
+ if (!MI.allImplicitDefsAreDead()) {
+ LLVM_DEBUG(dbgs() << "Not a candidate because has non-dead implicit def\n");
return false;
+ }
if (MI.mayRaiseFPException()) {
LLVM_DEBUG(dbgs() << "Not a candidate because may raise FP exception\n");
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index eb3969ed9d19e6f..2411e5359b40347 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -2522,9 +2522,8 @@ define <vscale x 4 x i32> @vsaddu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
;
; VLOPT-LABEL: vsaddu_vv:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vsaddu.vv v10, v8, v10
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vsaddu.vv v10, v8, v10
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vsaddu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
@@ -2543,9 +2542,8 @@ define <vscale x 4 x i32> @vsaddu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
;
; VLOPT-LABEL: vsaddu_vx:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vsaddu.vx v10, v8, a0
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vsaddu.vx v10, v8, a0
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vsaddu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
@@ -2564,9 +2562,8 @@ define <vscale x 4 x i32> @vsaddu_vi(<vscale x 4 x i32> %a, iXLen %vl) {
;
; VLOPT-LABEL: vsaddu_vi:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vsaddu.vi v10, v8, 5
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vsaddu.vi v10, v8, 5
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vsaddu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1)
@@ -2585,9 +2582,8 @@ define <vscale x 4 x i32> @vsadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
;
; VLOPT-LABEL: vsadd_vv:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vsadd.vv v10, v8, v10
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vsadd.vv v10, v8, v10
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vsadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
@@ -2606,9 +2602,8 @@ define <vscale x 4 x i32> @vsadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
;
; VLOPT-LABEL: vsadd_vx:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vsadd.vx v10, v8, a0
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vsadd.vx v10, v8, a0
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vsadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
@@ -2627,9 +2622,8 @@ define <vscale x 4 x i32> @vsadd_vi(<vscale x 4 x i32> %a, iXLen %vl) {
;
; VLOPT-LABEL: vsadd_vi:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vsadd.vi v10, v8, 5
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vsadd.vi v10, v8, 5
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vsadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 5, iXLen -1)
@@ -2648,9 +2642,8 @@ define <vscale x 4 x i32> @vssubu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
;
; VLOPT-LABEL: vssubu_vv:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vssubu.vv v10, v8, v10
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vssubu.vv v10, v8, v10
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vssubu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
@@ -2669,9 +2662,8 @@ define <vscale x 4 x i32> @vssubu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
;
; VLOPT-LABEL: vssubu_vx:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vssubu.vx v10, v8, a0
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vssubu.vx v10, v8, a0
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vssubu(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
@@ -2690,9 +2682,8 @@ define <vscale x 4 x i32> @vssub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
;
; VLOPT-LABEL: vssub_vv:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vssub.vv v10, v8, v10
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vssub.vv v10, v8, v10
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vssub(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
@@ -2711,9 +2702,8 @@ define <vscale x 4 x i32> @vssub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
;
; VLOPT-LABEL: vssub_vx:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vssub.vx v10, v8, a0
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vssub.vx v10, v8, a0
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vssub(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
@@ -2734,9 +2724,8 @@ define <vscale x 4 x i32> @vsmul_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
; VLOPT-LABEL: vsmul_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vsmul.vv v8, v8, v10
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vsmul.vv v8, v8, v10
; VLOPT-NEXT: vadd.vv v8, v8, v10
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vsmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
@@ -2757,9 +2746,8 @@ define <vscale x 4 x i32> @vsmul_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; VLOPT-LABEL: vsmul_vx:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vsmul.vx v10, v8, a0
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vsmul.vx v10, v8, a0
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vsmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1)
@@ -2780,9 +2768,8 @@ define <vscale x 4 x i32> @vssrl_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
; VLOPT-LABEL: vssrl_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vssrl.vv v8, v8, v10
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vssrl.vv v8, v8, v10
; VLOPT-NEXT: vadd.vv v8, v8, v10
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vssrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
@@ -2803,9 +2790,8 @@ define <vscale x 4 x i32> @vssrl_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl)
; VLOPT-LABEL: vssrl_vx:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vssrl.vx v10, v8, a0
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vssrl.vx v10, v8, a0
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vssrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen 0, iXLen -1)
@@ -2826,9 +2812,8 @@ define <vscale x 4 x i32> @vssrl_vi(<vscale x 4 x i32> %a, iXLen %vl) {
; VLOPT-LABEL: vssrl_vi:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vssrl.vi v10, v8, 5
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vssrl.vi v10, v8, 5
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vssrl.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen 0, iXLen -1)
@@ -2849,9 +2834,8 @@ define <vscale x 4 x i32> @vssra_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
; VLOPT-LABEL: vssra_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vssra.vv v8, v8, v10
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vssra.vv v8, v8, v10
; VLOPT-NEXT: vadd.vv v8, v8, v10
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vssra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
@@ -2872,9 +2856,8 @@ define <vscale x 4 x i32> @vssra_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl)
; VLOPT-LABEL: vssra_vx:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vssra.vx v10, v8, a0
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vssra.vx v10, v8, a0
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vssra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen 0, iXLen -1)
@@ -2895,9 +2878,8 @@ define <vscale x 4 x i32> @vssra_vi(<vscale x 4 x i32> %a, iXLen %vl) {
; VLOPT-LABEL: vssra_vi:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vssra.vi v10, v8, 5
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vssra.vi v10, v8, 5
; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vssra.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen 0, iXLen -1)
@@ -2918,9 +2900,8 @@ define <vscale x 4 x i32> @vnclipu_vv(<vscale x 4 x i64> %a, <vscale x 4 x i32>
; VLOPT-LABEL: vnclipu_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vnclipu.wv v14, v8, v12
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vnclipu.wv v14, v8, v12
; VLOPT-NEXT: vadd.vv v8, v14, v14
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
@@ -2941,9 +2922,8 @@ define <vscale x 4 x i32> @vnclipu_vx(<vscale x 4 x i64> %a, iXLen %b, iXLen %vl
; VLOPT-LABEL: vnclipu_vx:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vnclipu.wx v12, v8, a0
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vnclipu.wx v12, v8, a0
; VLOPT-NEXT: vadd.vv v8, v12, v12
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen %b, iXLen 0, iXLen -1)
@@ -2964,9 +2944,8 @@ define <vscale x 4 x i32> @vnclipu_vi(<vscale x 4 x i64> %a, iXLen %vl) {
; VLOPT-LABEL: vnclipu_vi:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vnclipu.wi v12, v8, 5
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vnclipu.wi v12, v8, 5
; VLOPT-NEXT: vadd.vv v8, v12, v12
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnclipu(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen 5, iXLen 0, iXLen -1)
@@ -2987,9 +2966,8 @@ define <vscale x 4 x i32> @vnclip_vv(<vscale x 4 x i64> %a, <vscale x 4 x i32> %
; VLOPT-LABEL: vnclip_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vnclip.wv v14, v8, v12
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vnclip.wv v14, v8, v12
; VLOPT-NEXT: vadd.vv v8, v14, v14
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
@@ -3010,9 +2988,8 @@ define <vscale x 4 x i32> @vnclip_vx(<vscale x 4 x i64> %a, iXLen %b, iXLen %vl)
; VLOPT-LABEL: vnclip_vx:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vnclip.wx v12, v8, a0
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vnclip.wx v12, v8, a0
; VLOPT-NEXT: vadd.vv v8, v12, v12
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen %b, iXLen 0, iXLen -1)
@@ -3033,9 +3010,8 @@ define <vscale x 4 x i32> @vnclip_vi(<vscale x 4 x i64> %a, iXLen %vl) {
; VLOPT-LABEL: vnclip_vi:
; VLOPT: # %bb.0:
; VLOPT-NEXT: csrwi vxrm, 0
-; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vnclip.wi v12, v8, 5
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vnclip.wi v12, v8, 5
; VLOPT-NEXT: vadd.vv v8, v12, v12
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnclip(<vscale x 4 x i32> poison, <vscale x 4 x i64> %a, iXLen 5, iXLen 0, iXLen -1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 135f35305969028..0475a988e9851b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -275,7 +275,7 @@ name: vxsat_dead
body: |
bb.0:
; CHECK-LABEL: name: vxsat_dead
- ; CHECK: %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat
+ ; CHECK: %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat
; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
%x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
More information about the llvm-commits
mailing list