[llvm] [RISCV] Allow folding vmerge with implicit merge operand when true has tied dest (PR #78565)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 4 02:22:27 PDT 2024
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/78565
>From c26dd23b2ba595f4e1766195fe6a7c3bfbaad3e7 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 18 Jan 2024 17:43:39 +0700
Subject: [PATCH 1/2] Add tests for cases where we could fold a vmerge into its
ops, but don't due to it having an implicit merge operand.
---
.../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 45 +++++++++++++++++++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 183741dd1ac33..d66ab34efe3a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -1144,3 +1144,48 @@ define <vscale x 2 x double> @vpmerge_vfwsub.w_tied(<vscale x 2 x double> %passt
%b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
ret <vscale x 2 x double> %b
}
+
+; FIXME: We don't currently handle vmerge with an implicit passthru if the true
+; operand also has a tied dest. This could be folded into a masked vmacc with ta
+; policy.
+define <vscale x 2 x i32> @true_tied_dest_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) {
+; CHECK-LABEL: true_tied_dest_vmerge_implicit_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vmacc.vv v11, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %avl, i64 0)
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
+ <vscale x 2 x i32> poison,
+ <vscale x 2 x i32> %passthru,
+ <vscale x 2 x i32> %a,
+ <vscale x 2 x i1> %m,
+ i64 %avl
+ )
+ ret <vscale x 2 x i32> %b
+}
+
+; FIXME: We don't currently handle vmerge with an implicit passthru if the true
+; operand also has a tied dest, e.g. has a passthru since it's a masked
+; pseudo. This could be folded into a masked vadd with ta policy.
+define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) {
+; CHECK-LABEL: true_mask_vmerge_implicit_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vadd.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v11
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl, i64 0)
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
+ <vscale x 2 x i32> poison,
+ <vscale x 2 x i32> %passthru,
+ <vscale x 2 x i32> %a,
+ <vscale x 2 x i1> shufflevector(<vscale x 2 x i1> insertelement(<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer),
+ i64 %avl
+ )
+ ret <vscale x 2 x i32> %b
+}
>From e68def333f735fa4613094fecddcbd4774665c9f Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 18 Jan 2024 18:28:09 +0700
Subject: [PATCH 2/2] [RISCV] Allow folding vmerge with implicit merge operand
when true has tied dest
We currently don't fold a vmerge if it has an implicit merge operand and its true operand has a tied dest (i.e. has a passthru operand).
This restriction was added in https://reviews.llvm.org/D151596, back whenever
we had separate TU/TA pseudos. It looks like it was added because the policy
might not have been handled correctly.
However the policy should be set correctly if we relax this restriction today,
since we compute the policy differently now that we have removed the TU/TA
distinction in our pseudos.
We use a TUMU policy, and relax it to TAMU iff the vmerge's merge operand is
implicit.
The reasoning behind this being that the tail elements always come from the
vmerge's merge operand[1], so if the merge operand is implicit-def then the
tail is implicit-def, and hence tail agnostic.
[1] unless the VL was shrunk, in which case we conservatively use TUMU.
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 8 --
.../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 16 +--
llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 110 +++++++-----------
3 files changed, 48 insertions(+), 86 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 8c1f8dca4e102..7bdd4f8f4dbc3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3749,11 +3749,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
// If True has a merge operand then it needs to be the same as vmerge's False,
// since False will be used for the result's merge operand.
if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
- // The vmerge instruction must be TU.
- // FIXME: This could be relaxed, but we need to handle the policy for the
- // resulting op correctly.
- if (isImplicitDef(Merge))
- return false;
SDValue MergeOpTrue = True->getOperand(0);
if (False != MergeOpTrue)
return false;
@@ -3763,9 +3758,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
// going to keep the mask from True.
if (IsMasked) {
assert(HasTiedDest && "Expected tied dest");
- // The vmerge instruction must be TU.
- if (isImplicitDef(Merge))
- return false;
// FIXME: Support mask agnostic True instruction which would have an
// undef merge operand.
if (Mask && !usesAllOnesMask(Mask, Glue))
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index d66ab34efe3a8..91a8677df3397 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -1151,11 +1151,8 @@ define <vscale x 2 x double> @vpmerge_vfwsub.w_tied(<vscale x 2 x double> %passt
define <vscale x 2 x i32> @true_tied_dest_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) {
; CHECK-LABEL: true_tied_dest_vmerge_implicit_passthru:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
-; CHECK-NEXT: vmv1r.v v11, v8
-; CHECK-NEXT: vmacc.vv v11, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmacc.vv v8, v9, v10, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %avl, i64 0)
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
@@ -1168,16 +1165,11 @@ define <vscale x 2 x i32> @true_tied_dest_vmerge_implicit_passthru(<vscale x 2 x
ret <vscale x 2 x i32> %b
}
-; FIXME: We don't currently handle vmerge with an implicit passthru if the true
-; operand also has a tied dest, e.g. has a passthru since it's a masked
-; pseudo. This could be folded into a masked vadd with ta policy.
define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) {
; CHECK-LABEL: true_mask_vmerge_implicit_passthru:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT: vmv1r.v v11, v8
-; CHECK-NEXT: vadd.vv v11, v9, v10, v0.t
-; CHECK-NEXT: vmv.v.v v8, v11
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
; CHECK-NEXT: ret
%a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl, i64 0)
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll
index 0322c1ab9f631..22ed56afbd94e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll
@@ -81,9 +81,8 @@ define <vscale x 1 x i8> @vmadd_vv_nxv1i8_ta(<vscale x 1 x i8> %a, <vscale x 1 x
define <vscale x 1 x i8> @vmadd_vx_nxv1i8_ta(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv1i8_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vmacc.vx v9, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -170,9 +169,8 @@ define <vscale x 2 x i8> @vmadd_vv_nxv2i8_ta(<vscale x 2 x i8> %a, <vscale x 2 x
define <vscale x 2 x i8> @vmadd_vx_nxv2i8_ta(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv2i8_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmacc.vx v9, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -259,9 +257,8 @@ define <vscale x 4 x i8> @vmadd_vv_nxv4i8_ta(<vscale x 4 x i8> %a, <vscale x 4 x
define <vscale x 4 x i8> @vmadd_vx_nxv4i8_ta(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv4i8_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vmacc.vx v9, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -348,9 +345,8 @@ define <vscale x 8 x i8> @vmadd_vv_nxv8i8_ta(<vscale x 8 x i8> %a, <vscale x 8 x
define <vscale x 8 x i8> @vmadd_vx_nxv8i8_ta(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv8i8_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vmacc.vx v9, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -437,9 +433,8 @@ define <vscale x 16 x i8> @vmadd_vv_nxv16i8_ta(<vscale x 16 x i8> %a, <vscale x
define <vscale x 16 x i8> @vmadd_vx_nxv16i8_ta(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv16i8_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vmacc.vx v10, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
@@ -526,9 +521,8 @@ define <vscale x 32 x i8> @vmadd_vv_nxv32i8_ta(<vscale x 32 x i8> %a, <vscale x
define <vscale x 32 x i8> @vmadd_vx_nxv32i8_ta(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv32i8_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vmacc.vx v12, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
@@ -618,9 +612,8 @@ define <vscale x 64 x i8> @vmadd_vv_nxv64i8_ta(<vscale x 64 x i8> %a, <vscale x
define <vscale x 64 x i8> @vmadd_vx_nxv64i8_ta(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv64i8_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vmacc.vx v16, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
%vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
@@ -707,9 +700,8 @@ define <vscale x 1 x i16> @vmadd_vv_nxv1i16_ta(<vscale x 1 x i16> %a, <vscale x
define <vscale x 1 x i16> @vmadd_vx_nxv1i16_ta(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv1i16_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vmacc.vx v9, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -796,9 +788,8 @@ define <vscale x 2 x i16> @vmadd_vv_nxv2i16_ta(<vscale x 2 x i16> %a, <vscale x
define <vscale x 2 x i16> @vmadd_vx_nxv2i16_ta(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv2i16_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vmacc.vx v9, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -885,9 +876,8 @@ define <vscale x 4 x i16> @vmadd_vv_nxv4i16_ta(<vscale x 4 x i16> %a, <vscale x
define <vscale x 4 x i16> @vmadd_vx_nxv4i16_ta(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv4i16_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vmacc.vx v9, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -974,9 +964,8 @@ define <vscale x 8 x i16> @vmadd_vv_nxv8i16_ta(<vscale x 8 x i16> %a, <vscale x
define <vscale x 8 x i16> @vmadd_vx_nxv8i16_ta(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv8i16_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vmacc.vx v10, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -1063,9 +1052,8 @@ define <vscale x 16 x i16> @vmadd_vv_nxv16i16_ta(<vscale x 16 x i16> %a, <vscale
define <vscale x 16 x i16> @vmadd_vx_nxv16i16_ta(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv16i16_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vmacc.vx v12, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
@@ -1155,9 +1143,8 @@ define <vscale x 32 x i16> @vmadd_vv_nxv32i16_ta(<vscale x 32 x i16> %a, <vscale
define <vscale x 32 x i16> @vmadd_vx_nxv32i16_ta(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv32i16_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vmacc.vx v16, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
%vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
@@ -1244,9 +1231,8 @@ define <vscale x 1 x i32> @vmadd_vv_nxv1i32_ta(<vscale x 1 x i32> %a, <vscale x
define <vscale x 1 x i32> @vmadd_vx_nxv1i32_ta(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv1i32_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vmacc.vx v9, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
@@ -1333,9 +1319,8 @@ define <vscale x 2 x i32> @vmadd_vv_nxv2i32_ta(<vscale x 2 x i32> %a, <vscale x
define <vscale x 2 x i32> @vmadd_vx_nxv2i32_ta(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv2i32_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vmacc.vx v9, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -1422,9 +1407,8 @@ define <vscale x 4 x i32> @vmadd_vv_nxv4i32_ta(<vscale x 4 x i32> %a, <vscale x
define <vscale x 4 x i32> @vmadd_vx_nxv4i32_ta(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv4i32_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vmacc.vx v10, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v10, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -1511,9 +1495,8 @@ define <vscale x 8 x i32> @vmadd_vv_nxv8i32_ta(<vscale x 8 x i32> %a, <vscale x
define <vscale x 8 x i32> @vmadd_vx_nxv8i32_ta(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv8i32_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vmacc.vx v12, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v12, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -1603,9 +1586,8 @@ define <vscale x 16 x i32> @vmadd_vv_nxv16i32_ta(<vscale x 16 x i32> %a, <vscale
define <vscale x 16 x i32> @vmadd_vx_nxv16i32_ta(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmadd_vx_nxv16i32_ta:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmacc.vx v16, a0, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
+; CHECK-NEXT: vmadd.vx v8, a0, v16, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
@@ -1739,9 +1721,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <v
;
; RV64-LABEL: vmadd_vx_nxv1i64_ta:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; RV64-NEXT: vmacc.vx v9, a0, v8
-; RV64-NEXT: vmerge.vvm v8, v8, v9, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu
+; RV64-NEXT: vmadd.vx v8, a0, v9, v0.t
; RV64-NEXT: ret
%elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
%vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
@@ -1875,9 +1856,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <v
;
; RV64-LABEL: vmadd_vx_nxv2i64_ta:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; RV64-NEXT: vmacc.vx v10, a0, v8
-; RV64-NEXT: vmerge.vvm v8, v8, v10, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu
+; RV64-NEXT: vmadd.vx v8, a0, v10, v0.t
; RV64-NEXT: ret
%elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
%vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
@@ -2011,9 +1991,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <v
;
; RV64-LABEL: vmadd_vx_nxv4i64_ta:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; RV64-NEXT: vmacc.vx v12, a0, v8
-; RV64-NEXT: vmerge.vvm v8, v8, v12, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu
+; RV64-NEXT: vmadd.vx v8, a0, v12, v0.t
; RV64-NEXT: ret
%elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
%vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -2150,9 +2129,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <v
;
; RV64-LABEL: vmadd_vx_nxv8i64_ta:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT: vmacc.vx v16, a0, v8
-; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT: vmadd.vx v8, a0, v16, v0.t
; RV64-NEXT: ret
%elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
%vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
More information about the llvm-commits
mailing list