[llvm] 5bbc5eb - [RISCV] Use _TIED form of VWADD(U)_WX/VWSUB(U)_WX to avoid early clobber.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 1 16:41:57 PDT 2022
Author: Craig Topper
Date: 2022-10-01T16:34:39-07:00
New Revision: 5bbc5eb55f0fbb508aeeab8d86a5051f5ac1849f
URL: https://github.com/llvm/llvm-project/commit/5bbc5eb55f0fbb508aeeab8d86a5051f5ac1849f
DIFF: https://github.com/llvm/llvm-project/commit/5bbc5eb55f0fbb508aeeab8d86a5051f5ac1849f.diff
LOG: [RISCV] Use _TIED form of VWADD(U)_WX/VWSUB(U)_WX to avoid early clobber.
One of the sources is the same size as the destination so that source
doesn't have an overlap with the destination register. By using the _TIED
form we avoid an early clobber contraint for that source.
This matches what was already done for instrinsics. ConvertToThreeAddress
will fix it if it can't stay tied.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index b7b25643e3971..067421f5a4deb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -362,8 +362,9 @@ multiclass VPatWidenBinarySDNode_WV_WX<SDNode op, PatFrags extop,
defvar wti = vtiToWti.Wti;
def : Pat<(op (wti.Vector wti.RegClass:$rs2),
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1)))),
- (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX)
- wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_TIED")
+ wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
def : Pat<(op (wti.Vector wti.RegClass:$rs2),
(wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1))))),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 97defa90849a9..dc8a1bb8b5a3f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -374,6 +374,27 @@ multiclass VPatBinaryVL_V<SDNode vop,
(mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
}
+multiclass VPatTiedBinaryNoMaskVL_V<SDNode vop,
+ string instruction_name,
+ string suffix,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op2_reg_class> {
+ def : Pat<(result_type (vop
+ (result_type result_reg_class:$rs1),
+ (op2_type op2_reg_class:$rs2),
+ srcvalue,
+ true_mask,
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED")
+ result_reg_class:$rs1,
+ op2_reg_class:$rs2,
+ GPR:$vl, sew, TAIL_AGNOSTIC)>;
+}
+
multiclass VPatBinaryVL_XI<SDNode vop,
string instruction_name,
string suffix,
@@ -445,6 +466,9 @@ multiclass VPatBinaryWVL_VV_VX_WV_WX<SDNode vop, SDNode vop_w,
foreach VtiToWti = AllWidenableIntVectors in {
defvar vti = VtiToWti.Vti;
defvar wti = VtiToWti.Wti;
+ defm : VPatTiedBinaryNoMaskVL_V<vop_w, instruction_name, "WV",
+ wti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, wti.RegClass, vti.RegClass>;
defm : VPatBinaryVL_V<vop_w, instruction_name, "WV",
wti.Vector, wti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
index c0df3405d462b..97fe17f275bec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
@@ -667,8 +667,8 @@ define <8 x i16> @vwadd_vx_v8i16_i16(<8 x i8>* %x, i16* %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vle8.v v9, (a0)
-; CHECK-NEXT: vlse16.v v10, (a1), zero
-; CHECK-NEXT: vwadd.wv v8, v10, v9
+; CHECK-NEXT: vlse16.v v8, (a1), zero
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <8 x i8>, <8 x i8>* %x
%b = load i16, i16* %y
@@ -720,8 +720,8 @@ define <4 x i32> @vwadd_vx_v4i32_i32(<4 x i16>* %x, i32* %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vlse32.v v10, (a1), zero
-; CHECK-NEXT: vwadd.wv v8, v10, v9
+; CHECK-NEXT: vlse32.v v8, (a1), zero
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <4 x i16>, <4 x i16>* %x
%b = load i32, i32* %y
@@ -743,8 +743,8 @@ define <2 x i64> @vwadd_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind {
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwadd.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwadd.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -776,8 +776,8 @@ define <2 x i64> @vwadd_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind {
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwadd.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwadd.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -809,8 +809,8 @@ define <2 x i64> @vwadd_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind {
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwadd.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwadd.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -842,8 +842,8 @@ define <2 x i64> @vwadd_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwadd.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwadd.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -851,8 +851,8 @@ define <2 x i64> @vwadd_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: vlse64.v v10, (a1), zero
-; RV64-NEXT: vwadd.wv v8, v10, v9
+; RV64-NEXT: vlse64.v v8, (a1), zero
+; RV64-NEXT: vwadd.wv v8, v8, v9
; RV64-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %x
%b = load i64, i64* %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
index 7b94083d870b8..fda9bd8eab24f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
@@ -667,8 +667,8 @@ define <8 x i16> @vwaddu_vx_v8i16_i16(<8 x i8>* %x, i16* %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vle8.v v9, (a0)
-; CHECK-NEXT: vlse16.v v10, (a1), zero
-; CHECK-NEXT: vwaddu.wv v8, v10, v9
+; CHECK-NEXT: vlse16.v v8, (a1), zero
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <8 x i8>, <8 x i8>* %x
%b = load i16, i16* %y
@@ -720,8 +720,8 @@ define <4 x i32> @vwaddu_vx_v4i32_i32(<4 x i16>* %x, i32* %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vlse32.v v10, (a1), zero
-; CHECK-NEXT: vwaddu.wv v8, v10, v9
+; CHECK-NEXT: vlse32.v v8, (a1), zero
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <4 x i16>, <4 x i16>* %x
%b = load i32, i32* %y
@@ -742,8 +742,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind {
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwaddu.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwaddu.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -774,8 +774,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind {
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwaddu.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwaddu.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -806,8 +806,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind {
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwaddu.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwaddu.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -839,8 +839,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwaddu.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwaddu.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -848,8 +848,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: vlse64.v v10, (a1), zero
-; RV64-NEXT: vwaddu.wv v8, v10, v9
+; RV64-NEXT: vlse64.v v8, (a1), zero
+; RV64-NEXT: vwaddu.wv v8, v8, v9
; RV64-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %x
%b = load i64, i64* %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
index 8ed07f041cbc0..4704a3219e30b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
@@ -650,9 +650,9 @@ define <8 x i16> @vwsub_vx_v8i16_i8(<8 x i8>* %x, i8* %y) {
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; CHECK-NEXT: lb a1, 0(a1)
; CHECK-NEXT: vle8.v v9, (a0)
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT: vwsub.wv v8, v10, v9
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <8 x i8>, <8 x i8>* %x
%b = load i8, i8* %y
@@ -669,8 +669,8 @@ define <8 x i16> @vwsub_vx_v8i16_i16(<8 x i8>* %x, i16* %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vle8.v v9, (a0)
-; CHECK-NEXT: vlse16.v v10, (a1), zero
-; CHECK-NEXT: vwsub.wv v8, v10, v9
+; CHECK-NEXT: vlse16.v v8, (a1), zero
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <8 x i8>, <8 x i8>* %x
%b = load i16, i16* %y
@@ -687,9 +687,9 @@ define <4 x i32> @vwsub_vx_v4i32_i8(<4 x i16>* %x, i8* %y) {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: lb a1, 0(a1)
; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vwsub.wv v8, v10, v9
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <4 x i16>, <4 x i16>* %x
%b = load i8, i8* %y
@@ -707,9 +707,9 @@ define <4 x i32> @vwsub_vx_v4i32_i16(<4 x i16>* %x, i16* %y) {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: lh a1, 0(a1)
; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vwsub.wv v8, v10, v9
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <4 x i16>, <4 x i16>* %x
%b = load i16, i16* %y
@@ -726,8 +726,8 @@ define <4 x i32> @vwsub_vx_v4i32_i32(<4 x i16>* %x, i32* %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vlse32.v v10, (a1), zero
-; CHECK-NEXT: vwsub.wv v8, v10, v9
+; CHECK-NEXT: vlse32.v v8, (a1), zero
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <4 x i16>, <4 x i16>* %x
%b = load i32, i32* %y
@@ -749,8 +749,8 @@ define <2 x i64> @vwsub_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind {
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwsub.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwsub.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -759,9 +759,9 @@ define <2 x i64> @vwsub_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind {
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64-NEXT: lb a1, 0(a1)
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vmv.v.x v8, a1
; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vwsub.wv v8, v10, v9
+; RV64-NEXT: vwsub.wv v8, v8, v9
; RV64-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %x
%b = load i8, i8* %y
@@ -784,8 +784,8 @@ define <2 x i64> @vwsub_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind {
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwsub.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwsub.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -794,9 +794,9 @@ define <2 x i64> @vwsub_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind {
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64-NEXT: lh a1, 0(a1)
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vmv.v.x v8, a1
; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vwsub.wv v8, v10, v9
+; RV64-NEXT: vwsub.wv v8, v8, v9
; RV64-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %x
%b = load i16, i16* %y
@@ -819,8 +819,8 @@ define <2 x i64> @vwsub_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind {
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwsub.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwsub.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -829,9 +829,9 @@ define <2 x i64> @vwsub_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind {
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64-NEXT: lw a1, 0(a1)
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vmv.v.x v8, a1
; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vwsub.wv v8, v10, v9
+; RV64-NEXT: vwsub.wv v8, v8, v9
; RV64-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %x
%b = load i32, i32* %y
@@ -854,8 +854,8 @@ define <2 x i64> @vwsub_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwsub.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwsub.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -863,8 +863,8 @@ define <2 x i64> @vwsub_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: vlse64.v v10, (a1), zero
-; RV64-NEXT: vwsub.wv v8, v10, v9
+; RV64-NEXT: vlse64.v v8, (a1), zero
+; RV64-NEXT: vwsub.wv v8, v8, v9
; RV64-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %x
%b = load i64, i64* %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
index 2e0ba4885ee62..258fbbe676055 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
@@ -650,9 +650,9 @@ define <8 x i16> @vwsubu_vx_v8i16_i8(<8 x i8>* %x, i8* %y) {
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; CHECK-NEXT: lbu a1, 0(a1)
; CHECK-NEXT: vle8.v v9, (a0)
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT: vwsubu.wv v8, v10, v9
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <8 x i8>, <8 x i8>* %x
%b = load i8, i8* %y
@@ -669,8 +669,8 @@ define <8 x i16> @vwsubu_vx_v8i16_i16(<8 x i8>* %x, i16* %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vle8.v v9, (a0)
-; CHECK-NEXT: vlse16.v v10, (a1), zero
-; CHECK-NEXT: vwsubu.wv v8, v10, v9
+; CHECK-NEXT: vlse16.v v8, (a1), zero
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <8 x i8>, <8 x i8>* %x
%b = load i16, i16* %y
@@ -687,9 +687,9 @@ define <4 x i32> @vwsubu_vx_v4i32_i8(<4 x i16>* %x, i8* %y) {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: lbu a1, 0(a1)
; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vwsubu.wv v8, v10, v9
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <4 x i16>, <4 x i16>* %x
%b = load i8, i8* %y
@@ -707,9 +707,9 @@ define <4 x i32> @vwsubu_vx_v4i32_i16(<4 x i16>* %x, i16* %y) {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: lhu a1, 0(a1)
; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vwsubu.wv v8, v10, v9
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <4 x i16>, <4 x i16>* %x
%b = load i16, i16* %y
@@ -726,8 +726,8 @@ define <4 x i32> @vwsubu_vx_v4i32_i32(<4 x i16>* %x, i32* %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vlse32.v v10, (a1), zero
-; CHECK-NEXT: vwsubu.wv v8, v10, v9
+; CHECK-NEXT: vlse32.v v8, (a1), zero
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
%a = load <4 x i16>, <4 x i16>* %x
%b = load i32, i32* %y
@@ -748,8 +748,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind {
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwsubu.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwsubu.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -758,9 +758,9 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind {
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64-NEXT: lbu a1, 0(a1)
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vmv.v.x v8, a1
; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vwsubu.wv v8, v10, v9
+; RV64-NEXT: vwsubu.wv v8, v8, v9
; RV64-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %x
%b = load i8, i8* %y
@@ -782,8 +782,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind {
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwsubu.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwsubu.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -792,9 +792,9 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind {
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64-NEXT: lhu a1, 0(a1)
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vmv.v.x v8, a1
; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vwsubu.wv v8, v10, v9
+; RV64-NEXT: vwsubu.wv v8, v8, v9
; RV64-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %x
%b = load i16, i16* %y
@@ -816,8 +816,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind {
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwsubu.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwsubu.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -826,9 +826,9 @@ define <2 x i64> @vwsubu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind {
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64-NEXT: lwu a1, 0(a1)
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vmv.v.x v8, a1
; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; RV64-NEXT: vwsubu.wv v8, v10, v9
+; RV64-NEXT: vwsubu.wv v8, v8, v9
; RV64-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %x
%b = load i32, i32* %y
@@ -851,8 +851,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vwsubu.wv v8, v10, v9
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: vwsubu.wv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -860,8 +860,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: vlse64.v v10, (a1), zero
-; RV64-NEXT: vwsubu.wv v8, v10, v9
+; RV64-NEXT: vlse64.v v8, (a1), zero
+; RV64-NEXT: vwsubu.wv v8, v8, v9
; RV64-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %x
%b = load i64, i64* %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index f538817e8772f..5c23f9815887c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -62,8 +62,7 @@ define <vscale x 1 x i64> @vwadd_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; CHECK-LABEL: vwadd_wv_nxv1i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
-; CHECK-NEXT: vwadd.wv v10, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
%vc = sext <vscale x 1 x i32> %vb to <vscale x 1 x i64>
%vd = add <vscale x 1 x i64> %va, %vc
@@ -74,8 +73,7 @@ define <vscale x 1 x i64> @vwaddu_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; CHECK-LABEL: vwaddu_wv_nxv1i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
-; CHECK-NEXT: vwaddu.wv v10, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
%vc = zext <vscale x 1 x i32> %vb to <vscale x 1 x i64>
%vd = add <vscale x 1 x i64> %va, %vc
@@ -168,8 +166,7 @@ define <vscale x 2 x i64> @vwadd_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; CHECK-LABEL: vwadd_wv_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vwadd.wv v12, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
%vc = sext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
%vd = add <vscale x 2 x i64> %va, %vc
@@ -180,8 +177,7 @@ define <vscale x 2 x i64> @vwaddu_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; CHECK-LABEL: vwaddu_wv_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vwaddu.wv v12, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
%vd = add <vscale x 2 x i64> %va, %vc
@@ -274,8 +270,7 @@ define <vscale x 4 x i64> @vwadd_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; CHECK-LABEL: vwadd_wv_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT: vwadd.wv v16, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
%vc = sext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
%vd = add <vscale x 4 x i64> %va, %vc
@@ -286,8 +281,7 @@ define <vscale x 4 x i64> @vwaddu_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; CHECK-LABEL: vwaddu_wv_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT: vwaddu.wv v16, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
%vc = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
%vd = add <vscale x 4 x i64> %va, %vc
@@ -380,8 +374,7 @@ define <vscale x 8 x i64> @vwadd_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; CHECK-LABEL: vwadd_wv_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
%vc = sext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
%vd = add <vscale x 8 x i64> %va, %vc
@@ -392,8 +385,7 @@ define <vscale x 8 x i64> @vwaddu_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; CHECK-LABEL: vwaddu_wv_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
%vc = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
%vd = add <vscale x 8 x i64> %va, %vc
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
index d25a574a438af..c5b19d321c0df 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
@@ -62,8 +62,7 @@ define <vscale x 1 x i64> @vwsub_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; CHECK-LABEL: vwsub_wv_nxv1i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
-; CHECK-NEXT: vwsub.wv v10, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
%vc = sext <vscale x 1 x i32> %vb to <vscale x 1 x i64>
%vd = sub <vscale x 1 x i64> %va, %vc
@@ -74,8 +73,7 @@ define <vscale x 1 x i64> @vwsubu_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; CHECK-LABEL: vwsubu_wv_nxv1i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
-; CHECK-NEXT: vwsubu.wv v10, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
%vc = zext <vscale x 1 x i32> %vb to <vscale x 1 x i64>
%vd = sub <vscale x 1 x i64> %va, %vc
@@ -168,8 +166,7 @@ define <vscale x 2 x i64> @vwsub_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; CHECK-LABEL: vwsub_wv_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vwsub.wv v12, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
%vc = sext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
%vd = sub <vscale x 2 x i64> %va, %vc
@@ -180,8 +177,7 @@ define <vscale x 2 x i64> @vwsubu_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; CHECK-LABEL: vwsubu_wv_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vwsubu.wv v12, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
%vc = zext <vscale x 2 x i32> %vb to <vscale x 2 x i64>
%vd = sub <vscale x 2 x i64> %va, %vc
@@ -274,8 +270,7 @@ define <vscale x 4 x i64> @vwsub_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; CHECK-LABEL: vwsub_wv_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT: vwsub.wv v16, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
%vc = sext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
%vd = sub <vscale x 4 x i64> %va, %vc
@@ -286,8 +281,7 @@ define <vscale x 4 x i64> @vwsubu_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; CHECK-LABEL: vwsubu_wv_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT: vwsubu.wv v16, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
%vc = zext <vscale x 4 x i32> %vb to <vscale x 4 x i64>
%vd = sub <vscale x 4 x i64> %va, %vc
@@ -380,8 +374,7 @@ define <vscale x 8 x i64> @vwsub_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; CHECK-LABEL: vwsub_wv_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
%vc = sext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
%vd = sub <vscale x 8 x i64> %va, %vc
@@ -392,8 +385,7 @@ define <vscale x 8 x i64> @vwsubu_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; CHECK-LABEL: vwsubu_wv_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
%vc = zext <vscale x 8 x i32> %vb to <vscale x 8 x i64>
%vd = sub <vscale x 8 x i64> %va, %vc
More information about the llvm-commits
mailing list